Unverified Commit cec3cdda authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

Fix input ids can be none attn mask (#3345)

* fix issue 3289

* fix attention mask if input_ids None behavior
parent f6d813aa
...@@ -330,8 +330,10 @@ class CTRLModel(CTRLPreTrainedModel): ...@@ -330,8 +330,10 @@ class CTRLModel(CTRLPreTrainedModel):
elif input_ids is not None: elif input_ids is not None:
input_shape = input_ids.size() input_shape = input_ids.size()
input_ids = input_ids.view(-1, input_shape[-1]) input_ids = input_ids.view(-1, input_shape[-1])
batch_size = input_ids.shape[0]
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
batch_size = inputs_embeds.shape[0]
else: else:
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
...@@ -347,7 +349,8 @@ class CTRLModel(CTRLPreTrainedModel): ...@@ -347,7 +349,8 @@ class CTRLModel(CTRLPreTrainedModel):
# Attention mask. # Attention mask.
if attention_mask is not None: if attention_mask is not None:
attention_mask = attention_mask.view(-1, input_shape[-1]) assert batch_size > 0, "batch_size has to be defined and > 0"
attention_mask = attention_mask.view(batch_size, -1)
# We create a 3D attention mask from a 2D tensor mask. # We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length] # Sizes are [batch_size, 1, 1, to_seq_length]
# So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length] # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
......
...@@ -402,8 +402,10 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -402,8 +402,10 @@ class GPT2Model(GPT2PreTrainedModel):
elif input_ids is not None: elif input_ids is not None:
input_shape = input_ids.size() input_shape = input_ids.size()
input_ids = input_ids.view(-1, input_shape[-1]) input_ids = input_ids.view(-1, input_shape[-1])
batch_size = input_ids.shape[0]
elif inputs_embeds is not None: elif inputs_embeds is not None:
input_shape = inputs_embeds.size()[:-1] input_shape = inputs_embeds.size()[:-1]
batch_size = inputs_embeds.shape[0]
else: else:
raise ValueError("You have to specify either input_ids or inputs_embeds") raise ValueError("You have to specify either input_ids or inputs_embeds")
...@@ -424,7 +426,7 @@ class GPT2Model(GPT2PreTrainedModel): ...@@ -424,7 +426,7 @@ class GPT2Model(GPT2PreTrainedModel):
# Attention mask. # Attention mask.
if attention_mask is not None: if attention_mask is not None:
batch_size = input_ids.shape[0] assert batch_size > 0, "batch_size has to be defined and > 0"
attention_mask = attention_mask.view(batch_size, -1) attention_mask = attention_mask.view(batch_size, -1)
# We create a 3D attention mask from a 2D tensor mask. # We create a 3D attention mask from a 2D tensor mask.
# Sizes are [batch_size, 1, 1, to_seq_length] # Sizes are [batch_size, 1, 1, to_seq_length]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment