[Whisper Docs] Nits (#24367)

* nits * config doc did not match * Apply suggestions from code review Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> --------- Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>

[Whisper Docs] Nits (#24367)
* nits * config doc did not match * Apply suggestions from code review Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com> --------- Co-authored-by: Sanchit Gandhi <93869735+sanchit-gandhi@users.noreply.github.com>
c2882403 · Arthur · GitHub · 83dc5762 · c2882403 · c2882403
Unverified Commit c2882403 authored Jun 20, 2023 by Arthur Committed by GitHub Jun 20, 2023
3 changed files
--- a/src/transformers/models/whisper/configuration_whisper.py
+++ b/src/transformers/models/whisper/configuration_whisper.py
@@ -127,7 +127,7 @@ class WhisperConfig(PretrainedConfig):
            Padding token id.
        bos_token_id (`int`, *optional*, defaults to 50256):
            Begin of stream token id.
-        eos_token_id (`int`, *optional*, defaults to 50257):
+        eos_token_id (`int`, *optional*, defaults to 50256):
            End of stream token id.
        suppress_tokens (`List[int]`, *optional*):
            A list containing the non-speech tokens that will be used by the logit processor in the `generate`
@@ -216,7 +216,7 @@ class WhisperConfig(PretrainedConfig):
        max_source_positions=1500,
        max_target_positions=448,
        pad_token_id=50256,
-        bos_token_id=50257,
+        bos_token_id=50256,
        eos_token_id=50256,
        suppress_tokens=None,
        begin_suppress_tokens=[220, 50256],

--- a/src/transformers/models/whisper/tokenization_whisper.py
+++ b/src/transformers/models/whisper/tokenization_whisper.py
@@ -231,8 +231,9 @@ class WhisperTokenizer(PreTrainedTokenizer):
        unk_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
-        bos_token (`str`, *optional*, defaults to `"<|startoftranscript|>"`):
-            The beginning of sequence token.
+        bos_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
+            The beginning of sequence token. The `decoder_start_token_id` is used to set the first token as
+            `"<|startoftranscript|>"` when generating.
        eos_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The end of sequence token.
        add_prefix_space (`bool`, *optional*, defaults to `False`):
@@ -261,7 +262,7 @@ class WhisperTokenizer(PreTrainedTokenizer):
        normalizer_file=None,
        errors="replace",
        unk_token="<|endoftext|>",
-        bos_token="<|startoftranscript|>",
+        bos_token="<|endoftext|>",
        eos_token="<|endoftext|>",
        pad_token=None,
        add_prefix_space=False,

--- a/src/transformers/models/whisper/tokenization_whisper_fast.py
+++ b/src/transformers/models/whisper/tokenization_whisper_fast.py
@@ -105,8 +105,9 @@ class WhisperTokenizerFast(PreTrainedTokenizerFast):
        unk_token (`str`, *optional*, defaults to `<|endoftext|>`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
-        bos_token (`str`, *optional*, defaults to `<|startoftranscript|>`):
-            The beginning of sequence token.
+        bos_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
+            The beginning of sequence token. The `decoder_start_token_id` is used to set the first token as
+            `"<|startoftranscript|>"` when generating.
        eos_token (`str`, *optional*, defaults to `<|endoftext|>`):
            The end of sequence token.
        add_prefix_space (`bool`, *optional*, defaults to `False`):
@@ -138,7 +139,7 @@ class WhisperTokenizerFast(PreTrainedTokenizerFast):
        normalizer_file=None,
        tokenizer_file=None,
        unk_token="<|endoftext|>",
-        bos_token="<|startoftranscript|>",
+        bos_token="<|endoftext|>",
        eos_token="<|endoftext|>",
        add_prefix_space=False,
        language=None,