Add `min_new_tokens` argument in generate() (implementation based on...

Add `min_new_tokens` argument in generate() (implementation based on `MinNewTokensLengthLogitsProcessor`) (#21044) add a new parameter min_new_tokens for generate()

Add `min_new_tokens` argument in generate() (implementation based on...
Add `min_new_tokens` argument in generate() (implementation based on `MinNewTokensLengthLogitsProcessor`) (#21044) add a new parameter min_new_tokens for generate()
fa906a26 · Silver · GitHub · 125f1375 · fa906a26 · fa906a26
Unverified Commit fa906a26 authored Jan 16, 2023 by Silver Committed by GitHub Jan 16, 2023
Showing with 17 additions and 1 deletion

src/transformers/generation/configuration_utils.py src/transformers/generation/configuration_utils.py +6 -1

src/transformers/generation/utils.py src/transformers/generation/utils.py +11 -0

No files found.
--- a/src/transformers/generation/configuration_utils.py
+++ b/src/transformers/generation/configuration_utils.py
@@ -75,7 +75,11 @@ class GenerationConfig(PushToHubMixin):
        max_new_tokens (`int`, *optional*):
            The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt.
        min_length (`int`, *optional*, defaults to 0):
-            The minimum length of the sequence to be generated.
+            The minimum length of the sequence to be generated. Corresponds to the length of the input prompt +
+            `min_new_tokens`. In general, prefer the use of `min_new_tokens`, which ignores the number of tokens in the
+            prompt.
+        min_new_tokens (`int`, *optional*):
+            The minimum numbers of tokens to generate, ignoring the number of tokens in the prompt.
        early_stopping (`bool`, *optional*, defaults to `False`):
            Whether to stop the beam search when at least `num_beams` sentences are finished per batch or not.
        max_time(`float`, *optional*):
@@ -207,6 +211,7 @@ class GenerationConfig(PushToHubMixin):
        self.max_length = kwargs.pop("max_length", 20)
        self.max_new_tokens = kwargs.pop("max_new_tokens", None)
        self.min_length = kwargs.pop("min_length", 0)
+        self.min_new_tokens = kwargs.pop("min_new_tokens", None)
        self.early_stopping = kwargs.pop("early_stopping", False)
        self.max_time = kwargs.pop("max_time", None)

--- a/src/transformers/generation/utils.py
+++ b/src/transformers/generation/utils.py
@@ -48,6 +48,7 @@ from .logits_process import (
    LogitNormalization,
    LogitsProcessorList,
    MinLengthLogitsProcessor,
+    MinNewTokensLengthLogitsProcessor,
    NoBadWordsLogitsProcessor,
    NoRepeatNGramLogitsProcessor,
    PrefixConstrainedLogitsProcessor,
@@ -822,6 +823,16 @@ class GenerationMixin:
            and generation_config.min_length > 0
        ):
            processors.append(MinLengthLogitsProcessor(generation_config.min_length, generation_config.eos_token_id))
+        if (
+            generation_config.min_new_tokens is not None
+            and generation_config.eos_token_id is not None
+            and generation_config.min_new_tokens > 0
+        ):
+            processors.append(
+                MinNewTokensLengthLogitsProcessor(
+                    input_ids_seq_length, generation_config.min_new_tokens, generation_config.eos_token_id
+                )
+            )
        if prefix_allowed_tokens_fn is not None:
            processors.append(
                PrefixConstrainedLogitsProcessor(