"src/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "78c2fdc52ef4dd79fb710605a4876894cafdb492"
Commit 37420855 authored by Ning Dong's avatar Ning Dong Committed by Facebook Github Bot
Browse files

Add default noising argument in WordNoiser initialization (#664)

Summary:
Pull Request resolved: https://github.com/pytorch/fairseq/pull/664

Previously arguments for noising (dropout_prob for WordDropout and max_shuffle_distance for WordShuffle) are only passed in noising() so it could not be customized in NoisingDataset.

Now add default argument in initializer so the value could be specified at construction.

Reviewed By: liezl200

Differential Revision: D15071632

fbshipit-source-id: 59a9bf5a5e6d03c1e74f1b31c1927e221cb11dfa
parent 9421e978
...@@ -72,10 +72,13 @@ class WordDropout(WordNoising): ...@@ -72,10 +72,13 @@ class WordDropout(WordNoising):
then dropped words will be removed. Otherwise, it will be replaced by the then dropped words will be removed. Otherwise, it will be replaced by the
blank_idx.""" blank_idx."""
def __init__(self, dictionary, bpe_cont_marker="@@", bpe_end_marker=None): def __init__(self, dictionary, default_dropout_prob=0.1, bpe_cont_marker="@@", bpe_end_marker=None):
super().__init__(dictionary, bpe_cont_marker, bpe_end_marker) super().__init__(dictionary, bpe_cont_marker, bpe_end_marker)
self.default_dropout_prob = default_dropout_prob
def noising(self, x, lengths, dropout_prob=0.1, blank_idx=None): def noising(self, x, lengths, dropout_prob=None, blank_idx=None):
if dropout_prob is None:
dropout_prob = self.default_dropout_prob
# x: (T x B), lengths: B # x: (T x B), lengths: B
if dropout_prob == 0: if dropout_prob == 0:
return x, lengths return x, lengths
...@@ -143,10 +146,13 @@ class WordDropout(WordNoising): ...@@ -143,10 +146,13 @@ class WordDropout(WordNoising):
class WordShuffle(WordNoising): class WordShuffle(WordNoising):
"""Shuffle words by no more than k positions.""" """Shuffle words by no more than k positions."""
def __init__(self, dictionary, bpe_cont_marker="@@", bpe_end_marker=None): def __init__(self, dictionary, default_max_shuffle_distance=3, bpe_cont_marker="@@", bpe_end_marker=None):
super().__init__(dictionary, bpe_cont_marker, bpe_end_marker) super().__init__(dictionary, bpe_cont_marker, bpe_end_marker)
self.default_max_shuffle_distance = 3
def noising(self, x, lengths, max_shuffle_distance=3): def noising(self, x, lengths, max_shuffle_distance=None):
if max_shuffle_distance is None:
max_shuffle_distance = self.default_max_shuffle_distance
# x: (T x B), lengths: B # x: (T x B), lengths: B
if max_shuffle_distance == 0: if max_shuffle_distance == 0:
return x, lengths return x, lengths
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment