fix #1223

45de034b · thomwolf · c88f0516 · 45de034b
Commit 45de034b authored Sep 17, 2019 by thomwolf
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 5 deletions

pytorch_transformers/modeling_xlnet.py pytorch_transformers/modeling_xlnet.py +10 -5

No files found.
--- a/pytorch_transformers/modeling_xlnet.py
+++ b/pytorch_transformers/modeling_xlnet.py
@@ -743,6 +743,7 @@ class XLNetModel(XLNetPreTrainedModel):

        if data_mask is not None:
            # all mems can be attended to
+            if mlen > 0:
                mems_mask = torch.zeros([data_mask.shape[0], mlen, bsz]).to(data_mask)
                data_mask = torch.cat([mems_mask, data_mask], dim=1)
            if attn_mask is None:
@@ -755,6 +756,7 @@ class XLNetModel(XLNetPreTrainedModel):

        if attn_mask is not None:
            non_tgt_mask = -torch.eye(qlen).to(attn_mask)
+            if mlen > 0:
                non_tgt_mask = torch.cat([torch.zeros([qlen, mlen]).to(attn_mask), non_tgt_mask], dim=-1)
            non_tgt_mask = ((attn_mask + non_tgt_mask[:, :, None, None]) > 0).to(attn_mask)
        else:
@@ -775,8 +777,11 @@ class XLNetModel(XLNetPreTrainedModel):
        ##### Segment embedding
        if token_type_ids is not None:
            # Convert `token_type_ids` to one-hot `seg_mat`
+            if mlen > 0:
                mem_pad = torch.zeros([mlen, bsz], dtype=torch.long, device=device)
                cat_ids = torch.cat([mem_pad, token_type_ids], dim=0)
+            else:
+                cat_ids = token_type_ids

            # `1` indicates not in the same segment [qlen x klen x bsz]
            seg_mat = (token_type_ids[:, None] != cat_ids[None, :]).long()