Unverified Commit 5ea2595e authored by JB (Don)'s avatar JB (Don) Committed by GitHub
Browse files

Add warning for missing attention mask when pad tokens are detected (#25345)

* Add attention mask and pad token warning to many of the models

* Remove changes under examples/research_projects

These files are not maintained by HG.

* Skip the warning check during torch.fx or JIT tracing

* Switch ordering for the warning and input shape assignment

This ordering is a little cleaner for some of the cases.

* Add missing line break in one of the files
parent 6ea3ee3c
...@@ -74,7 +74,7 @@ from .utils import ( ...@@ -74,7 +74,7 @@ from .utils import (
replace_return_docstrings, replace_return_docstrings,
) )
from .utils.hub import convert_file_size_to_int, get_checkpoint_shard_files from .utils.hub import convert_file_size_to_int, get_checkpoint_shard_files
from .utils.import_utils import ENV_VARS_TRUE_VALUES, is_sagemaker_mp_enabled from .utils.import_utils import ENV_VARS_TRUE_VALUES, is_sagemaker_mp_enabled, is_torch_fx_proxy
from .utils.quantization_config import BitsAndBytesConfig from .utils.quantization_config import BitsAndBytesConfig
from .utils.versions import require_version_core from .utils.versions import require_version_core
...@@ -3527,6 +3527,11 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix ...@@ -3527,6 +3527,11 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
""" """
Shows a one-time warning if the input_ids appear to contain padding and no attention mask was given. Shows a one-time warning if the input_ids appear to contain padding and no attention mask was given.
""" """
# Skip the check during tracing.
if is_torch_fx_proxy(input_ids) or torch.jit.is_tracing():
return
if (attention_mask is not None) or (self.config.pad_token_id is None): if (attention_mask is not None) or (self.config.pad_token_id is None):
return return
......
...@@ -701,6 +701,7 @@ class AlbertModel(AlbertPreTrainedModel): ...@@ -701,6 +701,7 @@ class AlbertModel(AlbertPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
......
...@@ -1267,6 +1267,7 @@ class AlignTextModel(AlignPreTrainedModel): ...@@ -1267,6 +1267,7 @@ class AlignTextModel(AlignPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
......
...@@ -1305,8 +1305,8 @@ class AltRobertaModel(AltCLIPPreTrainedModel): ...@@ -1305,8 +1305,8 @@ class AltRobertaModel(AltCLIPPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
input_shape = input_ids.size()
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask) self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
else: else:
......
...@@ -966,8 +966,8 @@ class BertModel(BertPreTrainedModel): ...@@ -966,8 +966,8 @@ class BertModel(BertPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
input_shape = input_ids.size()
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask) self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
else: else:
......
...@@ -768,6 +768,7 @@ class BertGenerationEncoder(BertGenerationPreTrainedModel): ...@@ -768,6 +768,7 @@ class BertGenerationEncoder(BertGenerationPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
......
...@@ -2035,6 +2035,7 @@ class BigBirdModel(BigBirdPreTrainedModel): ...@@ -2035,6 +2035,7 @@ class BigBirdModel(BigBirdPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
......
...@@ -1857,6 +1857,7 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel): ...@@ -1857,6 +1857,7 @@ class BigBirdPegasusEncoder(BigBirdPegasusPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
input_ids = input_ids.view(-1, input_shape[-1]) input_ids = input_ids.view(-1, input_shape[-1])
elif inputs_embeds is not None: elif inputs_embeds is not None:
......
...@@ -732,6 +732,7 @@ class BlenderbotEncoder(BlenderbotPreTrainedModel): ...@@ -732,6 +732,7 @@ class BlenderbotEncoder(BlenderbotPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
input_ids = input_ids.view(-1, input_shape[-1]) input_ids = input_ids.view(-1, input_shape[-1])
elif inputs_embeds is not None: elif inputs_embeds is not None:
......
...@@ -729,6 +729,7 @@ class BlenderbotSmallEncoder(BlenderbotSmallPreTrainedModel): ...@@ -729,6 +729,7 @@ class BlenderbotSmallEncoder(BlenderbotSmallPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
input_ids = input_ids.view(-1, input_shape[-1]) input_ids = input_ids.view(-1, input_shape[-1])
elif inputs_embeds is not None: elif inputs_embeds is not None:
......
...@@ -717,6 +717,7 @@ class BlipTextModel(BlipTextPreTrainedModel): ...@@ -717,6 +717,7 @@ class BlipTextModel(BlipTextPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
batch_size, seq_length = input_shape batch_size, seq_length = input_shape
device = input_ids.device device = input_ids.device
......
...@@ -1118,8 +1118,8 @@ class BridgeTowerTextModel(BridgeTowerPreTrainedModel): ...@@ -1118,8 +1118,8 @@ class BridgeTowerTextModel(BridgeTowerPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
input_shape = input_ids.size()
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask) self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
else: else:
......
...@@ -841,8 +841,8 @@ class CamembertModel(CamembertPreTrainedModel): ...@@ -841,8 +841,8 @@ class CamembertModel(CamembertPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
input_shape = input_ids.size()
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask) self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
else: else:
......
...@@ -1126,6 +1126,7 @@ class CanineModel(CaninePreTrainedModel): ...@@ -1126,6 +1126,7 @@ class CanineModel(CaninePreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
......
...@@ -1208,6 +1208,7 @@ class ChineseCLIPTextModel(ChineseCLIPPreTrainedModel): ...@@ -1208,6 +1208,7 @@ class ChineseCLIPTextModel(ChineseCLIPPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
......
...@@ -1853,8 +1853,8 @@ class ClapTextModel(ClapPreTrainedModel): ...@@ -1853,8 +1853,8 @@ class ClapTextModel(ClapPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
input_shape = input_ids.size()
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask) self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
else: else:
......
...@@ -460,6 +460,7 @@ class CodeGenModel(CodeGenPreTrainedModel): ...@@ -460,6 +460,7 @@ class CodeGenModel(CodeGenPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
input_ids = input_ids.view(-1, input_shape[-1]) input_ids = input_ids.view(-1, input_shape[-1])
batch_size = input_ids.shape[0] batch_size = input_ids.shape[0]
......
...@@ -818,6 +818,7 @@ class ConvBertModel(ConvBertPreTrainedModel): ...@@ -818,6 +818,7 @@ class ConvBertModel(ConvBertPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
......
...@@ -397,6 +397,7 @@ class CTRLModel(CTRLPreTrainedModel): ...@@ -397,6 +397,7 @@ class CTRLModel(CTRLPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size() input_shape = input_ids.size()
input_ids = input_ids.view(-1, input_shape[-1]) input_ids = input_ids.view(-1, input_shape[-1])
batch_size = input_ids.shape[0] batch_size = input_ids.shape[0]
......
...@@ -790,8 +790,8 @@ class Data2VecTextModel(Data2VecTextPreTrainedModel): ...@@ -790,8 +790,8 @@ class Data2VecTextModel(Data2VecTextPreTrainedModel):
if input_ids is not None and inputs_embeds is not None: if input_ids is not None and inputs_embeds is not None:
raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time") raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
elif input_ids is not None: elif input_ids is not None:
input_shape = input_ids.size()
self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask) self.warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
input_shape = input_ids.size()
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment