Commit 74c936dc authored by Naman Goyal's avatar Naman Goyal Committed by Facebook Github Bot
Browse files

added shuffle as arg for masked_lm for experimenting with pad effecie… (#582)

Summary:
added shuffle as arg for masked_lm for experimenting with pad effecient batching
Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/582

Reviewed By: jingfeidu

Differential Revision: D15355105

Pulled By: jingfeidu

fbshipit-source-id: 9925271a0bc2f9d283f354d158bd4b5ec8788b39
parent d1d3a581
...@@ -41,6 +41,7 @@ class MaskedLMTask(FairseqTask): ...@@ -41,6 +41,7 @@ class MaskedLMTask(FairseqTask):
help='max number of total tokens over all segments' help='max number of total tokens over all segments'
' per sample for BERT dataset') ' per sample for BERT dataset')
parser.add_argument('--break-mode', default="doc", type=str, help='mode for breaking sentence') parser.add_argument('--break-mode', default="doc", type=str, help='mode for breaking sentence')
parser.add_argument('--shuffle-dataset', default=False)
def __init__(self, args, dictionary): def __init__(self, args, dictionary):
super().__init__(args) super().__init__(args)
...@@ -134,6 +135,6 @@ class MaskedLMTask(FairseqTask): ...@@ -134,6 +135,6 @@ class MaskedLMTask(FairseqTask):
mask_idx=self.dictionary.mask(), mask_idx=self.dictionary.mask(),
classif_token_idx=self.dictionary.cls(), classif_token_idx=self.dictionary.cls(),
sep_token_idx=self.dictionary.sep(), sep_token_idx=self.dictionary.sep(),
shuffle=True, shuffle=self.args.shuffle_dataset,
seed=self.seed, seed=self.seed,
) )
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment