added the max_matching_ngram_size to GenerationConfig (#29131)

* added the max_matching_ngram_size parameter into the GenerationConfig, for the PromptLookupCandidateGenerator * switched back to keyword arguments * added PromptLookupCandidateGenerator docstring for its parameters * ruff reformat * Update src/transformers/generation/configuration_utils.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> --------- Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>

added the max_matching_ngram_size to GenerationConfig (#29131)
* added the max_matching_ngram_size parameter into the GenerationConfig, for the PromptLookupCandidateGenerator * switched back to keyword arguments * added PromptLookupCandidateGenerator docstring for its parameters * ruff reformat * Update src/transformers/generation/configuration_utils.py Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com> --------- Co-authored-by: Joao Gante <joaofranciscocardosogante@gmail.com> Co-authored-by: Arthur <48595927+ArthurZucker@users.noreply.github.com>
19fb1e22 · Moshe Berchansky · GitHub · ddb4fda3 · 19fb1e22 · 19fb1e22
Unverified Commit 19fb1e22 authored Mar 06, 2024 by Moshe Berchansky Committed by GitHub Mar 06, 2024
3 changed files
--- a/src/transformers/generation/candidate_generator.py
+++ b/src/transformers/generation/candidate_generator.py
@@ -252,10 +252,10 @@ class PromptLookupCandidateGenerator(CandidateGenerator):
    def __init__(
        self,
        num_output_tokens: int = 10,
-        max_matching_ngram_size: int = 2,
+        max_matching_ngram_size: int = None,
    ):
        self.num_output_tokens = num_output_tokens
-        self.max_matching_ngram_size = max_matching_ngram_size
+        self.max_matching_ngram_size = max_matching_ngram_size if max_matching_ngram_size else 2
        if self.max_matching_ngram_size <= 0 or self.num_output_tokens <= 0:
            raise ValueError("Invalid max_matching_ngram_size or num_output_tokens")

--- a/src/transformers/generation/configuration_utils.py
+++ b/src/transformers/generation/configuration_utils.py
@@ -279,11 +279,18 @@ class GenerationConfig(PushToHubMixin):
            - `"heuristic_transient"`: Same as `"heuristic"` but `num_assistant_tokens` is reset to its initial value after each generation call.
            - `"constant"`: `num_assistant_tokens` stays unchanged during generation
+        prompt_lookup_num_tokens (`int`, *optional*, default to `None`):
+            The number of tokens to be output as candidate tokens.
+        max_matching_ngram_size (`int`, *optional*, default to `None`):
+            The maximum ngram size to be considered for matching in the prompt. Default to 2 if not provided.
        > Parameters specific to the caching mechanism:
        cache_implementation (`str`, *optional*, default to `None`):
            Cache class that should be used when generating.
        > Wild card
        generation_kwargs:
@@ -360,6 +367,7 @@ class GenerationConfig(PushToHubMixin):
        # Prompt lookup decoding
        self.prompt_lookup_num_tokens = kwargs.pop("prompt_lookup_num_tokens", None)
+        self.max_matching_ngram_size = kwargs.pop("max_matching_ngram_size", None)
        # Wild card
        self.generation_kwargs = kwargs.pop("generation_kwargs", {})

--- a/src/transformers/generation/utils.py
+++ b/src/transformers/generation/utils.py
@@ -688,6 +688,7 @@ class GenerationMixin:
        if generation_config.prompt_lookup_num_tokens is not None:
            candidate_generator = PromptLookupCandidateGenerator(
                num_output_tokens=generation_config.prompt_lookup_num_tokens,
+                max_matching_ngram_size=generation_config.max_matching_ngram_size,
            )
        else:
            candidate_generator = AssistedCandidateGenerator(