Generate: better `compute_transition_scores` examples (#21323)

2b8feffa · Joao Gante · GitHub · 449df41f · 2b8feffa
Unverified Commit 2b8feffa authored Jan 26, 2023 by Joao Gante Committed by GitHub Jan 26, 2023
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 3 deletions

src/transformers/generation/utils.py src/transformers/generation/utils.py +6 -3

No files found.
--- a/src/transformers/generation/utils.py
+++ b/src/transformers/generation/utils.py
@@ -971,7 +971,9 @@ class GenerationMixin:
        >>> transition_scores = model.compute_transition_scores(
        ...     outputs.sequences, outputs.scores, normalize_logits=True
        ... )
-        >>> input_length = inputs.input_ids.shape[1]
+        >>> # input_length is the length of the input prompt for decoder-only models, like the GPT family, and 1 for
+        >>> # encoder-decoder models, like BART or T5.
+        >>> input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
        >>> generated_tokens = outputs.sequences[:, input_length:]
        >>> for tok, score in zip(generated_tokens[0], transition_scores[0]):
        ...     # | token | token string | logits | probability
@@ -995,8 +997,9 @@ class GenerationMixin:
        ...     outputs.sequences, outputs.scores, outputs.beam_indices, normalize_logits=False
        ... )
        >>> # If you sum the generated tokens' scores and apply the length penalty, you'll get the sequence scores.
-        >>> # Tip: set `normalize_logits=True` to recompute the scores from the normalized logits.
+        >>> # Tip: recomputing the scores is only guaranteed to match with `normalize_logits=False`. Depending on the
-        >>> output_length = inputs.input_ids.shape[1] + np.sum(transition_scores.numpy() < 0, axis=1)
+        >>> # use case, you might want to recompute it with `normalize_logits=True`.
+        >>> output_length = input_length + np.sum(transition_scores.numpy() < 0, axis=1)
        >>> length_penalty = model.generation_config.length_penalty
        >>> reconstructed_scores = transition_scores.sum(axis=1) / (output_length**length_penalty)
        >>> print(np.allclose(outputs.sequences_scores, reconstructed_scores))