fix sequence length for prepare_inputs for xlnet

f18ac4c2 · patrickvonplaten · 359dc438 · f18ac4c2
Commit f18ac4c2 authored Dec 24, 2019 by patrickvonplaten
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

src/transformers/modeling_xlnet.py src/transformers/modeling_xlnet.py +1 -1

No files found.
--- a/src/transformers/modeling_xlnet.py
+++ b/src/transformers/modeling_xlnet.py
@@ -1012,11 +1012,11 @@ class XLNetLMHeadModel(XLNetPreTrainedModel):
        # Add dummy token at the end (no attention on this one)

        effective_batch_size = input_ids.shape[0]
-        sequence_length = input_ids.shape[1]
        dummy_token = torch.zeros((effective_batch_size, 1), dtype=torch.long, device=input_ids.device)
        input_ids = torch.cat([input_ids, dummy_token], dim=1)

        # Build permutation mask so that previous tokens don't see last token
+        sequence_length = input_ids.shape[1]
        perm_mask = torch.zeros(
            (effective_batch_size, sequence_length, sequence_length), dtype=torch.float, device=input_ids.device
        )