Unverified Commit a357ed50 authored by Joao Gante's avatar Joao Gante Committed by GitHub
Browse files

Generate: add warning when left padding should be used (#19067)

* add warning when left padding should be used

* PT: check for pad token; FLAX: can only check while not tracing
parent 942fa8ce
...@@ -326,6 +326,14 @@ class FlaxGenerationMixin: ...@@ -326,6 +326,14 @@ class FlaxGenerationMixin:
if decoder_start_token_id is None and self.config.is_encoder_decoder: if decoder_start_token_id is None and self.config.is_encoder_decoder:
raise ValueError("`decoder_start_token_id` has to be defined for encoder-decoder generation.") raise ValueError("`decoder_start_token_id` has to be defined for encoder-decoder generation.")
# decoder-only models should use left-padding for generation (can't be checked with `trace=True`)
if not self.config.is_encoder_decoder and not trace:
if pad_token_id is not None and jnp.sum(input_ids[:, -1] == pad_token_id) > 0:
logger.warning(
"A decoder-only architecture is being used, but right-padding was detected! For correct "
"generation results, please set `padding_side='left'` when initializing the tokenizer."
)
if self.config.is_encoder_decoder: if self.config.is_encoder_decoder:
# add encoder_outputs to model_kwargs # add encoder_outputs to model_kwargs
if model_kwargs.get("encoder_outputs") is None: if model_kwargs.get("encoder_outputs") is None:
......
...@@ -1605,6 +1605,14 @@ class TFGenerationMixin: ...@@ -1605,6 +1605,14 @@ class TFGenerationMixin:
input_ids, pad_token_id, eos_token_id input_ids, pad_token_id, eos_token_id
) )
# decoder-only models should use left-padding for generation
if not self.config.is_encoder_decoder:
if pad_token_id is not None and tf.math.reduce_any(input_ids[:, -1] == pad_token_id):
logger.warning(
"A decoder-only architecture is being used, but right-padding was detected! For correct "
"generation results, please set `padding_side='left'` when initializing the tokenizer."
)
# 4. Prepare model inputs which will be used for auto-regressive generation # 4. Prepare model inputs which will be used for auto-regressive generation
if self.config.is_encoder_decoder: if self.config.is_encoder_decoder:
# if encoder-decoder, we create encoder_outputs and add to `model_kwargs` # if encoder-decoder, we create encoder_outputs and add to `model_kwargs`
......
...@@ -1229,6 +1229,14 @@ class GenerationMixin: ...@@ -1229,6 +1229,14 @@ class GenerationMixin:
inputs_tensor, pad_token_id, eos_token_id inputs_tensor, pad_token_id, eos_token_id
) )
# decoder-only models should use left-padding for generation
if not self.config.is_encoder_decoder:
if pad_token_id is not None and torch.sum(inputs_tensor[:, -1] == pad_token_id) > 0:
logger.warning(
"A decoder-only architecture is being used, but right-padding was detected! For correct "
"generation results, please set `padding_side='left'` when initializing the tokenizer."
)
if self.config.is_encoder_decoder and "encoder_outputs" not in model_kwargs: if self.config.is_encoder_decoder and "encoder_outputs" not in model_kwargs:
# if model is encoder decoder encoder_outputs are created # if model is encoder decoder encoder_outputs are created
# and added to `model_kwargs` # and added to `model_kwargs`
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment