Fix input ids can be none attn mask (#3345)

* fix issue 3289 * fix attention mask if input_ids None behavior

Fix input ids can be none attn mask (#3345)
* fix issue 3289 * fix attention mask if input_ids None behavior
cec3cdda · Patrick von Platen · GitHub · f6d813aa · cec3cdda · cec3cdda
Unverified Commit cec3cdda authored Mar 19, 2020 by Patrick von Platen Committed by GitHub Mar 19, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 2 deletions

src/transformers/modeling_ctrl.py src/transformers/modeling_ctrl.py +4 -1

src/transformers/modeling_gpt2.py src/transformers/modeling_gpt2.py +3 -1

No files found.
--- a/src/transformers/modeling_ctrl.py
+++ b/src/transformers/modeling_ctrl.py
@@ -330,8 +330,10 @@ class CTRLModel(CTRLPreTrainedModel):
        elif input_ids is not None:
            input_shape = input_ids.size()
            input_ids = input_ids.view(-1, input_shape[-1])
+            batch_size = input_ids.shape[0]
        elif inputs_embeds is not None:
            input_shape = inputs_embeds.size()[:-1]
+            batch_size = inputs_embeds.shape[0]
        else:
            raise ValueError("You have to specify either input_ids or inputs_embeds")
@@ -347,7 +349,8 @@ class CTRLModel(CTRLPreTrainedModel):
        # Attention mask.
        if attention_mask is not None:
-            attention_mask = attention_mask.view(-1, input_shape[-1])
+            assert batch_size > 0, "batch_size has to be defined and > 0"
+            attention_mask = attention_mask.view(batch_size, -1)
            # We create a 3D attention mask from a 2D tensor mask.
            # Sizes are [batch_size, 1, 1, to_seq_length]
            # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]

--- a/src/transformers/modeling_gpt2.py
+++ b/src/transformers/modeling_gpt2.py
@@ -402,8 +402,10 @@ class GPT2Model(GPT2PreTrainedModel):
        elif input_ids is not None:
            input_shape = input_ids.size()
            input_ids = input_ids.view(-1, input_shape[-1])
+            batch_size = input_ids.shape[0]
        elif inputs_embeds is not None:
            input_shape = inputs_embeds.size()[:-1]
+            batch_size = inputs_embeds.shape[0]
        else:
            raise ValueError("You have to specify either input_ids or inputs_embeds")
@@ -424,7 +426,7 @@ class GPT2Model(GPT2PreTrainedModel):
        # Attention mask.
        if attention_mask is not None:
-            batch_size = input_ids.shape[0]
+            assert batch_size > 0, "batch_size has to be defined and > 0"
            attention_mask = attention_mask.view(batch_size, -1)
            # We create a 3D attention mask from a 2D tensor mask.
            # Sizes are [batch_size, 1, 1, to_seq_length]