cast bool tensor to long for pytorch < 1.3

4d181999 · Rémi Louf · Julien Chaumond · 9f75565e · 4d181999
Commit 4d181999 authored Nov 12, 2019 by Rémi Louf Committed by Julien Chaumond Dec 09, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 0 deletions

transformers/modeling_bert.py transformers/modeling_bert.py +1 -0

No files found.
--- a/transformers/modeling_bert.py
+++ b/transformers/modeling_bert.py
@@ -675,6 +675,7 @@ class BertModel(BertPreTrainedModel):
                batch_size, seq_length = input_shape
                seq_ids = torch.arange(seq_length, device=device)
                causal_mask = seq_ids[None, None, :].repeat(batch_size, seq_length, 1) <= seq_ids[None, :, None]
+                causal_mask = causal_mask.to(torch.long)  # not converting to long will cause errors with pytorch version < 1.3
                extended_attention_mask = causal_mask[:, None, :, :] * attention_mask[:, None, None, :]
            else:
                extended_attention_mask = attention_mask[:, None, None, :]