fix attn mask shape of gpt (#472)

1559c0df · ver217 · GitHub · 3cb3fc27 · 1559c0df
Unverified Commit 1559c0df authored Mar 21, 2022 by ver217 Committed by GitHub Mar 21, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

model_zoo/gpt/gpt.py model_zoo/gpt/gpt.py +1 -1

No files found.
--- a/model_zoo/gpt/gpt.py
+++ b/model_zoo/gpt/gpt.py
@@ -292,7 +292,7 @@ class GPT(nn.Module):
        # So we can broadcast to [batch_size, num_heads, from_seq_length, to_seq_length]
        # Adapted from huggingface
        if attention_mask is not None:
-            batch_size = x.shape[0]
+            batch_size = input_ids.shape[0]
            attention_mask = attention_mask.view(batch_size, -1)
            attention_mask = col_nn.partition_batch(attention_mask)
            attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)