Commit 45de034b authored by thomwolf's avatar thomwolf
Browse files

fix #1223

parent c88f0516
......@@ -743,6 +743,7 @@ class XLNetModel(XLNetPreTrainedModel):
if data_mask is not None:
# all mems can be attended to
if mlen > 0:
mems_mask = torch.zeros([data_mask.shape[0], mlen, bsz]).to(data_mask)
data_mask = torch.cat([mems_mask, data_mask], dim=1)
if attn_mask is None:
......@@ -755,6 +756,7 @@ class XLNetModel(XLNetPreTrainedModel):
if attn_mask is not None:
non_tgt_mask = -torch.eye(qlen).to(attn_mask)
if mlen > 0:
non_tgt_mask = torch.cat([torch.zeros([qlen, mlen]).to(attn_mask), non_tgt_mask], dim=-1)
non_tgt_mask = ((attn_mask + non_tgt_mask[:, :, None, None]) > 0).to(attn_mask)
else:
......@@ -775,8 +777,11 @@ class XLNetModel(XLNetPreTrainedModel):
##### Segment embedding
if token_type_ids is not None:
# Convert `token_type_ids` to one-hot `seg_mat`
if mlen > 0:
mem_pad = torch.zeros([mlen, bsz], dtype=torch.long, device=device)
cat_ids = torch.cat([mem_pad, token_type_ids], dim=0)
else:
cat_ids = token_type_ids
# `1` indicates not in the same segment [qlen x klen x bsz]
seg_mat = (token_type_ids[:, None] != cat_ids[None, :]).long()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment