"vscode:/vscode.git/clone" did not exist on "dd9217923f7bb5e7f9e5aa4465a810a0fd65859b"
Commit b7fc043f authored by Sylvain Gugger's avatar Sylvain Gugger
Browse files

Merge branch 'master' of github.com:huggingface/transformers

parents 81a6c7cd 195bfd11
...@@ -41,16 +41,28 @@ def prepare_m2m_100_inputs_dict( ...@@ -41,16 +41,28 @@ def prepare_m2m_100_inputs_dict(
decoder_input_ids, decoder_input_ids,
attention_mask=None, attention_mask=None,
decoder_attention_mask=None, decoder_attention_mask=None,
head_mask=None,
decoder_head_mask=None,
cross_attn_head_mask=None,
): ):
if attention_mask is None: if attention_mask is None:
attention_mask = input_ids.ne(config.pad_token_id) attention_mask = input_ids.ne(config.pad_token_id)
if decoder_attention_mask is None: if decoder_attention_mask is None:
decoder_attention_mask = decoder_input_ids.ne(config.pad_token_id) decoder_attention_mask = decoder_input_ids.ne(config.pad_token_id)
if head_mask is None:
head_mask = torch.ones(config.encoder_layers, config.encoder_attention_heads, device=torch_device)
if decoder_head_mask is None:
decoder_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device)
if cross_attn_head_mask is None:
cross_attn_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device)
return { return {
"input_ids": input_ids, "input_ids": input_ids,
"decoder_input_ids": decoder_input_ids, "decoder_input_ids": decoder_input_ids,
"attention_mask": attention_mask, "attention_mask": attention_mask,
"decoder_attention_mask": attention_mask, "decoder_attention_mask": attention_mask,
"head_mask": head_mask,
"decoder_head_mask": decoder_head_mask,
"cross_attn_head_mask": cross_attn_head_mask,
} }
...@@ -142,9 +154,10 @@ class M2M100ModelTester: ...@@ -142,9 +154,10 @@ class M2M100ModelTester:
model = M2M100Model(config=config).get_decoder().to(torch_device).eval() model = M2M100Model(config=config).get_decoder().to(torch_device).eval()
input_ids = inputs_dict["input_ids"] input_ids = inputs_dict["input_ids"]
attention_mask = inputs_dict["attention_mask"] attention_mask = inputs_dict["attention_mask"]
head_mask = inputs_dict["head_mask"]
# first forward pass # first forward pass
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True) outputs = model(input_ids, attention_mask=attention_mask, head_mask=head_mask, use_cache=True)
output, past_key_values = outputs.to_tuple() output, past_key_values = outputs.to_tuple()
...@@ -217,7 +230,6 @@ class M2M100ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase ...@@ -217,7 +230,6 @@ class M2M100ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase
all_generative_model_classes = (M2M100ForConditionalGeneration,) if is_torch_available() else () all_generative_model_classes = (M2M100ForConditionalGeneration,) if is_torch_available() else ()
is_encoder_decoder = True is_encoder_decoder = True
test_pruning = False test_pruning = False
test_head_masking = False
test_missing_keys = False test_missing_keys = False
def setUp(self): def setUp(self):
......
...@@ -60,6 +60,7 @@ def prepare_marian_inputs_dict( ...@@ -60,6 +60,7 @@ def prepare_marian_inputs_dict(
decoder_attention_mask=None, decoder_attention_mask=None,
head_mask=None, head_mask=None,
decoder_head_mask=None, decoder_head_mask=None,
cross_attn_head_mask=None,
): ):
if attention_mask is None: if attention_mask is None:
attention_mask = input_ids.ne(config.pad_token_id) attention_mask = input_ids.ne(config.pad_token_id)
...@@ -69,6 +70,8 @@ def prepare_marian_inputs_dict( ...@@ -69,6 +70,8 @@ def prepare_marian_inputs_dict(
head_mask = torch.ones(config.encoder_layers, config.encoder_attention_heads, device=torch_device) head_mask = torch.ones(config.encoder_layers, config.encoder_attention_heads, device=torch_device)
if decoder_head_mask is None: if decoder_head_mask is None:
decoder_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device) decoder_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device)
if cross_attn_head_mask is None:
cross_attn_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device)
return { return {
"input_ids": input_ids, "input_ids": input_ids,
"decoder_input_ids": decoder_input_ids, "decoder_input_ids": decoder_input_ids,
...@@ -76,6 +79,7 @@ def prepare_marian_inputs_dict( ...@@ -76,6 +79,7 @@ def prepare_marian_inputs_dict(
"decoder_attention_mask": attention_mask, "decoder_attention_mask": attention_mask,
"head_mask": head_mask, "head_mask": head_mask,
"decoder_head_mask": decoder_head_mask, "decoder_head_mask": decoder_head_mask,
"cross_attn_head_mask": cross_attn_head_mask,
} }
......
...@@ -52,6 +52,7 @@ def prepare_mbart_inputs_dict( ...@@ -52,6 +52,7 @@ def prepare_mbart_inputs_dict(
decoder_attention_mask=None, decoder_attention_mask=None,
head_mask=None, head_mask=None,
decoder_head_mask=None, decoder_head_mask=None,
cross_attn_head_mask=None,
): ):
if attention_mask is None: if attention_mask is None:
attention_mask = input_ids.ne(config.pad_token_id) attention_mask = input_ids.ne(config.pad_token_id)
...@@ -61,6 +62,8 @@ def prepare_mbart_inputs_dict( ...@@ -61,6 +62,8 @@ def prepare_mbart_inputs_dict(
head_mask = torch.ones(config.encoder_layers, config.encoder_attention_heads, device=torch_device) head_mask = torch.ones(config.encoder_layers, config.encoder_attention_heads, device=torch_device)
if decoder_head_mask is None: if decoder_head_mask is None:
decoder_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device) decoder_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device)
if cross_attn_head_mask is None:
cross_attn_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device)
return { return {
"input_ids": input_ids, "input_ids": input_ids,
"decoder_input_ids": decoder_input_ids, "decoder_input_ids": decoder_input_ids,
...@@ -68,6 +71,7 @@ def prepare_mbart_inputs_dict( ...@@ -68,6 +71,7 @@ def prepare_mbart_inputs_dict(
"decoder_attention_mask": attention_mask, "decoder_attention_mask": attention_mask,
"head_mask": head_mask, "head_mask": head_mask,
"decoder_head_mask": decoder_head_mask, "decoder_head_mask": decoder_head_mask,
"cross_attn_head_mask": cross_attn_head_mask,
} }
......
...@@ -42,6 +42,7 @@ def prepare_pegasus_inputs_dict( ...@@ -42,6 +42,7 @@ def prepare_pegasus_inputs_dict(
decoder_attention_mask=None, decoder_attention_mask=None,
head_mask=None, head_mask=None,
decoder_head_mask=None, decoder_head_mask=None,
cross_attn_head_mask=None,
): ):
if attention_mask is None: if attention_mask is None:
attention_mask = input_ids.ne(config.pad_token_id) attention_mask = input_ids.ne(config.pad_token_id)
...@@ -51,6 +52,8 @@ def prepare_pegasus_inputs_dict( ...@@ -51,6 +52,8 @@ def prepare_pegasus_inputs_dict(
head_mask = torch.ones(config.encoder_layers, config.encoder_attention_heads, device=torch_device) head_mask = torch.ones(config.encoder_layers, config.encoder_attention_heads, device=torch_device)
if decoder_head_mask is None: if decoder_head_mask is None:
decoder_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device) decoder_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device)
if cross_attn_head_mask is None:
cross_attn_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device)
return { return {
"input_ids": input_ids, "input_ids": input_ids,
"decoder_input_ids": decoder_input_ids, "decoder_input_ids": decoder_input_ids,
...@@ -58,6 +61,7 @@ def prepare_pegasus_inputs_dict( ...@@ -58,6 +61,7 @@ def prepare_pegasus_inputs_dict(
"decoder_attention_mask": attention_mask, "decoder_attention_mask": attention_mask,
"head_mask": head_mask, "head_mask": head_mask,
"decoder_head_mask": decoder_head_mask, "decoder_head_mask": decoder_head_mask,
"cross_attn_head_mask": cross_attn_head_mask,
} }
......
...@@ -55,17 +55,29 @@ def prepare_speech_to_text_inputs_dict( ...@@ -55,17 +55,29 @@ def prepare_speech_to_text_inputs_dict(
decoder_input_ids, decoder_input_ids,
attention_mask=None, attention_mask=None,
decoder_attention_mask=None, decoder_attention_mask=None,
head_mask=None,
decoder_head_mask=None,
cross_attn_head_mask=None,
): ):
if attention_mask is None: if attention_mask is None:
attention_mask = input_features.ne(0) attention_mask = input_features.ne(0)
if decoder_attention_mask is None: if decoder_attention_mask is None:
decoder_attention_mask = decoder_input_ids.ne(config.pad_token_id) decoder_attention_mask = decoder_input_ids.ne(config.pad_token_id)
if head_mask is None:
head_mask = torch.ones(config.encoder_layers, config.encoder_attention_heads, device=torch_device)
if decoder_head_mask is None:
decoder_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device)
if cross_attn_head_mask is None:
cross_attn_head_mask = torch.ones(config.decoder_layers, config.decoder_attention_heads, device=torch_device)
return { return {
# "input_ids": input_features, # "input_ids": input_features,
"input_features": input_features, "input_features": input_features,
"decoder_input_ids": decoder_input_ids, "decoder_input_ids": decoder_input_ids,
"attention_mask": attention_mask, "attention_mask": attention_mask,
"decoder_attention_mask": attention_mask, "decoder_attention_mask": attention_mask,
"head_mask": head_mask,
"decoder_head_mask": decoder_head_mask,
"cross_attn_head_mask": cross_attn_head_mask,
} }
...@@ -247,7 +259,6 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Tes ...@@ -247,7 +259,6 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Tes
all_generative_model_classes = (Speech2TextForConditionalGeneration,) if is_torch_available() else () all_generative_model_classes = (Speech2TextForConditionalGeneration,) if is_torch_available() else ()
is_encoder_decoder = True is_encoder_decoder = True
test_pruning = False test_pruning = False
test_head_masking = False
test_missing_keys = False test_missing_keys = False
test_torchscript = True test_torchscript = True
...@@ -316,8 +327,8 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Tes ...@@ -316,8 +327,8 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.Tes
"decoder_attention_mask", "decoder_attention_mask",
] ]
expected_arg_names.extend( expected_arg_names.extend(
["head_mask", "decoder_head_mask", "encoder_outputs"] ["head_mask", "decoder_head_mask", "cross_attn_head_mask", "encoder_outputs"]
if "head_mask" and "decoder_head_mask" in arg_names if "head_mask" and "decoder_head_mask" and "cross_attn_head_mask" in arg_names
else ["encoder_outputs"] else ["encoder_outputs"]
) )
self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names) self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
import itertools
import os import os
import unittest import unittest
...@@ -118,6 +119,29 @@ class XLMRobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase): ...@@ -118,6 +119,29 @@ class XLMRobertaTokenizationTest(TokenizerTesterMixin, unittest.TestCase):
], ],
) )
def test_subword_regularization_tokenizer(self):
# Subword regularization is only available for the slow tokenizer.
tokenizer = XLMRobertaTokenizer(
SAMPLE_VOCAB, keep_accents=True, sp_model_kwargs={"enable_sampling": True, "alpha": 0.1, "nbest_size": -1}
)
# Subword regularization augments training data with subword sampling.
# This has a random component. We test if the tokenizer generates different
# results when subword regularization is enabled.
tokens_list = []
for _ in range(5):
tokens_list.append(tokenizer.tokenize("This is a test for subword regularization."))
# the list of different pairs of tokens_list
combinations = itertools.combinations(tokens_list, 2)
all_equal = True
for combination in combinations:
if combination[0] != combination[1]:
all_equal = False
self.assertFalse(all_equal)
@cached_property @cached_property
def big_tokenizer(self): def big_tokenizer(self):
return XLMRobertaTokenizer.from_pretrained("xlm-roberta-base") return XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment