🚨 No more default chat templates (#31733)

* No more default chat templates * Add the template to the GPT-SW3 tests since it's not available by default now * Fix GPT2 test * Fix Bloom test * Fix Bloom test * Remove default templates again

🚨 No more default chat templates (#31733)
* No more default chat templates * Add the template to the GPT-SW3 tests since it's not available by default now * Fix GPT2 test * Fix Bloom test * Fix Bloom test * Remove default templates again
edd68f4e · Matt · GitHub · 1c122a46 · edd68f4e · edd68f4e
Unverified Commit edd68f4e authored Jul 24, 2024 by Matt Committed by GitHub Jul 24, 2024
9 changed files
--- a/src/transformers/models/llama/tokenization_llama_fast.py
+++ b/src/transformers/models/llama/tokenization_llama_fast.py
@@ -241,61 +241,6 @@ class LlamaTokenizerFast(PreTrainedTokenizerFast):

        return (out_vocab_file,)

-    @property
-    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.default_chat_template
-    def default_chat_template(self):
-        """
-        LLaMA uses [INST] and [/INST] to indicate user messages, and <<SYS>> and <</SYS>> to indicate system messages.
-        Assistant messages do not have special tokens, because LLaMA chat models are generally trained with strict
-        user/assistant/user/assistant message ordering, and so assistant messages can be identified from the ordering
-        rather than needing special tokens. The system message is partly 'embedded' in the first user message, which
-        results in an unusual token ordering when it is present. This template should definitely be changed if you wish
-        to fine-tune a model with more flexible role ordering!
-
-        The output should look something like:
-
-        <bos>[INST] B_SYS SystemPrompt E_SYS Prompt [/INST] Answer <eos><bos>[INST] Prompt [/INST] Answer <eos>
-        <bos>[INST] Prompt [/INST]
-
-        The reference for this chat template is [this code
-        snippet](https://github.com/facebookresearch/llama/blob/556949fdfb72da27c2f4a40b7f0e4cf0b8153a28/llama/generation.py#L320-L362)
-        in the original repository.
-        """
-        template = (
-            "{% if messages[0]['role'] == 'system' %}"
-            "{% set loop_messages = messages[1:] %}"  # Extract system message if it's present
-            "{% set system_message = messages[0]['content'] %}"
-            "{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}"
-            "{% set loop_messages = messages %}"  # Or use the default system message if the flag is set
-            "{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}"
-            "{% else %}"
-            "{% set loop_messages = messages %}"
-            "{% set system_message = false %}"
-            "{% endif %}"
-            "{% for message in loop_messages %}"  # Loop over all non-system messages
-            "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
-            "{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}"
-            "{% endif %}"
-            "{% if loop.index0 == 0 and system_message != false %}"  # Embed system message in first message
-            "{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}"
-            "{% else %}"
-            "{% set content = message['content'] %}"
-            "{% endif %}"
-            "{% if message['role'] == 'user' %}"  # After all of that, handle messages/roles in a fairly normal way
-            "{{ bos_token + '[INST] ' + content.strip() + ' [/INST]' }}"
-            "{% elif message['role'] == 'system' %}"
-            "{{ '<<SYS>>\\n' + content.strip() + '\\n<</SYS>>\\n\\n' }}"
-            "{% elif message['role'] == 'assistant' %}"
-            "{{ ' '  + content.strip() + ' ' + eos_token }}"
-            "{% endif %}"
-            "{% endfor %}"
-        )
-        template = template.replace("USE_DEFAULT_PROMPT", "true" if self.use_default_system_prompt else "false")
-        default_message = DEFAULT_SYSTEM_PROMPT.replace("\n", "\\n").replace("'", "\\'")
-        template = template.replace("DEFAULT_SYSTEM_MESSAGE", default_message)
-
-        return template
-
    # TODO ArthurZ let's rely on the template processor instead, refactor all fast tokenizers
    # Copied from transformers.models.llama.tokenization_llama.LlamaTokenizer.build_inputs_with_special_tokens
    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):

--- a/src/transformers/models/llava_next_video/processing_llava_next_video.py
+++ b/src/transformers/models/llava_next_video/processing_llava_next_video.py
@@ -159,63 +159,3 @@ class LlavaNextVideoProcessor(ProcessorMixin):
        tokenizer_input_names = self.tokenizer.model_input_names
        image_processor_input_names = self.image_processor.model_input_names
        return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
-
-    @property
-    def default_chat_template(self):
-        """
-        This default vicuna template formats inputs in the form of a chat history. For each message in the chat history:
-        * the template will output the role of the speaker followed by the content of the message.
-        * content is a list of strings and images.
-        * If the content element is an image, the template will output a sequence of <image> or <video> tokens
-
-        Example:
-
-        ```python
-        messages = [{
-            "role": "user",
-            "content": [
-                {"type": "text", "text": "What’s the content of this video?"},
-                {"type": "video"},
-                ],
-        },
-        {
-            "role": "assistant",
-            "content": [{"type": "text", "text": "This picture shows a red stop sign."},]
-        }]
-        ```
-
-        Will create outputs like:
-        ```
-        USER: <video>\nWhat is the content of this video?
-        ASSITANT: This picture shows a red stop sign
-        ```
-        """
-        # fmt: off
-        return (
-            "{% for message in messages %}"
-                "{% if message['role'] == 'system' %}"
-                    "{{ message['content'][0]['text'] }}"
-                "{% else %}"
-                    "{{ message['role'].upper() + ': '}}"
-                "{% endif %}"
-
-                "{# Render all images first #}"
-                "{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}"
-                    "{{ '<image>\n' }}"
-                "{% endfor %}"
-
-                "{# Render all videos next #}"
-                "{% for content in message['content'] | selectattr('type', 'equalto', 'video') %}"
-                    "{{ '<video>\n' }}"
-                "{% endfor %}"
-
-                "{# Render all text finally #}"
-                "{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}"
-                    "{{ content['text'] + ' '}}"
-                "{% endfor %}"
-            "{% endfor %}"
-            "{% if add_generation_prompt %}"
-                "{{ 'ASSISTANT:' }}"
-            "{% endif %}"
-        )
-        # fmt: on
--- a/src/transformers/models/whisper/tokenization_whisper.py
+++ b/src/transformers/models/whisper/tokenization_whisper.py
@@ -810,14 +810,6 @@ class WhisperTokenizer(PreTrainedTokenizer):
            text = " " + text
        return (text, kwargs)

-    @property
-    # Copied from transformers.models.gpt2.tokenization_gpt2.GPT2Tokenizer.default_chat_template
-    def default_chat_template(self):
-        """
-        A simple chat template that ignores role information and just concatenates messages with EOS tokens.
-        """
-        return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
-
    def get_decoder_prompt_ids(self, task=None, language=None, no_timestamps=True):
        self.set_prefix_tokens(task=task, language=language, predict_timestamps=not no_timestamps)
        # prefix tokens are of the form: <|startoftranscript|> <|lang_id|> <|task|> <|notimestamps|>

--- a/src/transformers/models/whisper/tokenization_whisper_fast.py
+++ b/src/transformers/models/whisper/tokenization_whisper_fast.py
@@ -539,14 +539,6 @@ class WhisperTokenizerFast(PreTrainedTokenizerFast):
            return prefix_ones + ([0] * len(token_ids_0)) + suffix_ones
        return prefix_ones + ([0] * len(token_ids_0)) + ([0] * len(token_ids_1)) + suffix_ones

-    @property
-    # Copied from transformers.models.gpt2.tokenization_gpt2.GPT2Tokenizer.default_chat_template
-    def default_chat_template(self):
-        """
-        A simple chat template that ignores role information and just concatenates messages with EOS tokens.
-        """
-        return "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
-
    # Copied from transformers.models.whisper.tokenization_whisper.WhisperTokenizer.get_decoder_prompt_ids
    def get_decoder_prompt_ids(self, task=None, language=None, no_timestamps=True):
        self.set_prefix_tokens(task=task, language=language, predict_timestamps=not no_timestamps)

--- a/src/transformers/processing_utils.py
+++ b/src/transformers/processing_utils.py
@@ -971,8 +971,8 @@ class ProcessorMixin(PushToHubMixin):
            conversation (`List[Dict, str, str]`):
                The conversation to format.
            chat_template (`Optional[str]`, *optional*):
-                The Jinja template to use for formatting the conversation. If not provided, the default chat template
-                is used.
+                The Jinja template to use for formatting the conversation. If not provided, the tokenizer's
+                chat template is used.
            tokenize (`bool`, *optional*, defaults to `False`):
                Whether to tokenize the output or not.
            **kwargs:
@@ -982,15 +982,6 @@ class ProcessorMixin(PushToHubMixin):
        if chat_template is None:
            if self.chat_template is not None:
                chat_template = self.chat_template
-            elif getattr(self, "default_chat_template", None) is not None:
-                logger.warning_once(
-                    "No chat template is set for this processor, falling back to a default class-level template. This is "
-                    "very error-prone, because models are often trained with templates different from the class default! "
-                    "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
-                    "point any code depending on them will stop working. We recommend setting a valid chat template before "
-                    "then to ensure that this model continues working without issues."
-                )
-                chat_template = self.default_chat_template
            else:
                raise ValueError(
                    "No chat template is set for this processor. Please either set the `chat_template` attribute, "

--- a/src/transformers/tokenization_utils_base.py
+++ b/src/transformers/tokenization_utils_base.py
@@ -1704,8 +1704,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        """
        Converts a list of dictionaries with `"role"` and `"content"` keys to a list of token
        ids. This method is intended for use with chat models, and will read the tokenizer's chat_template attribute to
-        determine the format and control tokens to use when converting. When chat_template is None, it will fall back
-        to the default_chat_template specified at the class level.
+        determine the format and control tokens to use when converting.

        Args:
            conversation (Union[List[Dict[str, str]], List[List[Dict[str, str]]]]): A list of dicts
@@ -1986,22 +1985,12 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
        Returns:
            `str`: The chat template string.
        """
-        using_default_template = False
        # First, handle the cases when the model has a dict of multiple templates
-        if isinstance(self.chat_template, dict) or (
-            self.chat_template is None and isinstance(self.default_chat_template, dict)
-        ):
-            if self.chat_template is not None:
-                template_dict = self.chat_template
-                using_default_dict = False
-            else:
-                template_dict = self.default_chat_template
-                using_default_dict = True
+        if isinstance(self.chat_template, dict):
+            template_dict = self.chat_template
            if chat_template is not None and chat_template in template_dict:
                # The user can pass the name of a template to the chat template argument instead of an entire template
                chat_template = template_dict[chat_template]
-                if using_default_dict:
-                    using_default_template = True
            elif chat_template is None:
                if tools is not None and "tool_use" in template_dict:
                    chat_template = template_dict["tool_use"]
@@ -2013,44 +2002,23 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
                        "template or the name of the template you wish to use to the `chat_template` argument. Available "
                        f"template names are {sorted(template_dict.keys())}."
                    )
-                if using_default_dict:
-                    using_default_template = True

        elif chat_template is None:
            # These are the cases when the model has a single template
-            # priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
+            # priority: `chat_template` argument > `tokenizer.chat_template`
            if self.chat_template is not None:
                chat_template = self.chat_template
-            else:
-                chat_template = self.default_chat_template
-                using_default_template = True

-        if using_default_template:
-            logger.warning_once(
-                "No chat template is set for this tokenizer, falling back to a default class-level template. This is "
-                "very error-prone, because models are often trained with templates different from the class default! "
-                "Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
-                "point any code depending on them will stop working. We recommend setting a valid chat template before "
-                "then to ensure that this model continues working without issues."
-            )
+            else:
+                raise ValueError(
+                    "Cannot use apply_chat_template() because tokenizer.chat_template is not set and no template "
+                    "argument was passed! For information about writing templates and setting the "
+                    "tokenizer.chat_template attribute, please see the documentation at "
+                    "https://huggingface.co/docs/transformers/main/en/chat_templating"
+                )

        return chat_template

-    @property
-    def default_chat_template(self):
-        """
-        This template formats inputs in the standard ChatML format. See
-        https://github.com/openai/openai-python/blob/main/chatml.md
-        """
-        return (
-            "{% for message in messages %}"
-            "{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}"
-            "{% endfor %}"
-            "{% if add_generation_prompt %}"
-            "{{ '<|im_start|>assistant\n' }}"
-            "{% endif %}"
-        )
-
    @classmethod
    def from_pretrained(
        cls,

--- a/tests/models/bloom/test_tokenization_bloom.py
+++ b/tests/models/bloom/test_tokenization_bloom.py
@@ -135,6 +135,7 @@ class BloomTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    @require_jinja
    def test_tokenization_for_chat(self):
        tokenizer = self.get_rust_tokenizer()
+        tokenizer.chat_template = "{% for message in messages %}" "{{ message.content }}{{ eos_token }}" "{% endfor %}"
        test_chats = [
            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
            [

--- a/tests/models/gpt2/test_tokenization_gpt2.py
+++ b/tests/models/gpt2/test_tokenization_gpt2.py
@@ -280,6 +280,7 @@ class GPT2TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    @require_jinja
    def test_tokenization_for_chat(self):
        tokenizer = GPT2Tokenizer.from_pretrained(self.tmpdirname)
+        tokenizer.chat_template = "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}"
        test_chats = [
            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],
            [

--- a/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py
+++ b/tests/models/gpt_sw3/test_tokenization_gpt_sw3.py
@@ -131,6 +131,15 @@ class GPTSw3TokenizationTest(TokenizerTesterMixin, unittest.TestCase):
    @require_jinja
    def test_tokenization_for_chat(self):
        tokenizer = GPTSw3Tokenizer(SAMPLE_VOCAB)
+        tokenizer.chat_template = (
+            "{{ eos_token }}{{ bos_token }}"
+            "{% for message in messages %}"
+            "{% if message['role'] == 'user' %}{{ 'User: ' + message['content']}}"
+            "{% else %}{{ 'Bot: ' + message['content']}}{% endif %}"
+            "{{ message['text'] }}{{ bos_token }}"
+            "{% endfor %}"
+            "Bot:"
+        )
        # This is in English, but it's just here to make sure the chat control tokens are being added properly
        test_chats = [
            [{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": "Hello!"}],