Cast to long when masking tokens

1ebfeb79 · Lysandre · 9c67196b · 1ebfeb79
Commit 1ebfeb79 authored Feb 04, 2020 by Lysandre
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 0 deletions

examples/run_lm_finetuning.py examples/run_lm_finetuning.py +1 -0

No files found.
--- a/examples/run_lm_finetuning.py
+++ b/examples/run_lm_finetuning.py
@@ -195,6 +195,7 @@ def _rotate_checkpoints(args, checkpoint_prefix="checkpoint", use_mtime=False) -
 def mask_tokens(inputs: torch.Tensor, tokenizer: PreTrainedTokenizer, args) -> Tuple[torch.Tensor, torch.Tensor]:
    """ Prepare masked tokens inputs/labels for masked language modeling: 80% MASK, 10% random, 10% original. """
+    inputs = inputs.clone().type(dtype=torch.long)
    labels = inputs.clone()
    # We sample a few tokens in each sequence for masked-LM training (with probability args.mlm_probability defaults to 0.15 in Bert/RoBERTa)
    probability_matrix = torch.full(labels.shape, args.mlm_probability)