Commit 1ab25c49 authored by thomwolf's avatar thomwolf
Browse files

Merge branch 'master' into pr/2115

parents df396112 18601c3b
...@@ -18,7 +18,7 @@ from __future__ import print_function ...@@ -18,7 +18,7 @@ from __future__ import print_function
import copy import copy
import sys import sys
import os import os.path
import shutil import shutil
import tempfile import tempfile
import json import json
...@@ -30,7 +30,7 @@ import logging ...@@ -30,7 +30,7 @@ import logging
from transformers import is_torch_available from transformers import is_torch_available
from .utils import require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available(): if is_torch_available():
import torch import torch
...@@ -58,7 +58,7 @@ else: ...@@ -58,7 +58,7 @@ else:
def _config_zero_init(config): def _config_zero_init(config):
configs_no_init = copy.deepcopy(config) configs_no_init = copy.deepcopy(config)
for key in configs_no_init.__dict__.keys(): for key in configs_no_init.__dict__.keys():
if '_range' in key or '_std' in key: if '_range' in key or '_std' in key or 'initializer_factor' in key:
setattr(configs_no_init, key, 0.0) setattr(configs_no_init, key, 0.0)
return configs_no_init return configs_no_init
...@@ -73,6 +73,7 @@ class CommonTestCases: ...@@ -73,6 +73,7 @@ class CommonTestCases:
test_pruning = True test_pruning = True
test_resize_embeddings = True test_resize_embeddings = True
test_head_masking = True test_head_masking = True
is_encoder_decoder = False
def test_save_load(self): def test_save_load(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -83,6 +84,8 @@ class CommonTestCases: ...@@ -83,6 +84,8 @@ class CommonTestCases:
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
outputs = model(**inputs_dict) outputs = model(**inputs_dict)
out_2 = outputs[0].numpy()
out_2[np.isnan(out_2)] = 0
with TemporaryDirectory() as tmpdirname: with TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname) model.save_pretrained(tmpdirname)
...@@ -93,9 +96,7 @@ class CommonTestCases: ...@@ -93,9 +96,7 @@ class CommonTestCases:
# Make sure we don't have nans # Make sure we don't have nans
out_1 = after_outputs[0].cpu().numpy() out_1 = after_outputs[0].cpu().numpy()
out_2 = outputs[0].cpu().numpy() out_1[np.isnan(out_1)] = 0
out_1 = out_1[~np.isnan(out_1)]
out_2 = out_2[~np.isnan(out_2)]
max_diff = np.amax(np.abs(out_1 - out_2)) max_diff = np.amax(np.abs(out_1 - out_2))
self.assertLessEqual(max_diff, 1e-5) self.assertLessEqual(max_diff, 1e-5)
...@@ -117,20 +118,32 @@ class CommonTestCases: ...@@ -117,20 +118,32 @@ class CommonTestCases:
model = model_class(config) model = model_class(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
first, second = model(inputs_dict["input_ids"])[0], model(inputs_dict["input_ids"])[0] with torch.no_grad():
self.assertEqual(first.ne(second).sum().item(), 0) first = model(**inputs_dict)[0]
second = model(**inputs_dict)[0]
out_1 = first.cpu().numpy()
out_2 = second.cpu().numpy()
out_1 = out_1[~np.isnan(out_1)]
out_2 = out_2[~np.isnan(out_2)]
max_diff = np.amax(np.abs(out_1 - out_2))
self.assertLessEqual(max_diff, 1e-5)
def test_attention_outputs(self): def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
decoder_seq_length = self.model_tester.decoder_seq_length if hasattr(self.model_tester, 'decoder_seq_length') else self.model_tester.seq_length
encoder_seq_length = self.model_tester.encoder_seq_length if hasattr(self.model_tester, 'encoder_seq_length') else self.model_tester.seq_length
decoder_key_length = self.model_tester.key_length if hasattr(self.model_tester, 'key_length') else decoder_seq_length
encoder_key_length = self.model_tester.key_length if hasattr(self.model_tester, 'key_length') else encoder_seq_length
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config.output_attentions = True config.output_attentions = True
config.output_hidden_states = False config.output_hidden_states = False
model = model_class(config) model = model_class(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
outputs = model(**inputs_dict) with torch.no_grad():
outputs = model(**inputs_dict)
attentions = outputs[-1] attentions = outputs[-1]
self.assertEqual(model.config.output_attentions, True) self.assertEqual(model.config.output_attentions, True)
self.assertEqual(model.config.output_hidden_states, False) self.assertEqual(model.config.output_hidden_states, False)
...@@ -138,28 +151,42 @@ class CommonTestCases: ...@@ -138,28 +151,42 @@ class CommonTestCases:
self.assertListEqual( self.assertListEqual(
list(attentions[0].shape[-3:]), list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads,
self.model_tester.seq_length, encoder_seq_length ,
self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length]) encoder_key_length])
out_len = len(outputs) out_len = len(outputs)
if self.is_encoder_decoder:
self.assertEqual(out_len % 2, 0)
decoder_attentions = outputs[(out_len // 2)-1]
self.assertEqual(model.config.output_attentions, True)
self.assertEqual(model.config.output_hidden_states, False)
self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(decoder_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads,
decoder_seq_length,
decoder_key_length
])
# Check attention is always last and order is fine # Check attention is always last and order is fine
config.output_attentions = True config.output_attentions = True
config.output_hidden_states = True config.output_hidden_states = True
model = model_class(config) model = model_class(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
outputs = model(**inputs_dict) with torch.no_grad():
self.assertEqual(out_len+1, len(outputs)) outputs = model(**inputs_dict)
self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1), len(outputs))
self.assertEqual(model.config.output_attentions, True) self.assertEqual(model.config.output_attentions, True)
self.assertEqual(model.config.output_hidden_states, True) self.assertEqual(model.config.output_hidden_states, True)
attentions = outputs[-1] self_attentions = outputs[-1]
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual( self.assertListEqual(
list(attentions[0].shape[-3:]), list(self_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads,
self.model_tester.seq_length, encoder_seq_length,
self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length]) encoder_key_length])
def test_torchscript(self): def test_torchscript(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -191,21 +218,22 @@ class CommonTestCases: ...@@ -191,21 +218,22 @@ class CommonTestCases:
inputs = inputs_dict['input_ids'] # Let's keep only input_ids inputs = inputs_dict['input_ids'] # Let's keep only input_ids
try: try:
torch.jit.trace(model, inputs) traced_gpt2 = torch.jit.trace(model, inputs)
except RuntimeError: except RuntimeError:
self.fail("Couldn't trace module.") self.fail("Couldn't trace module.")
try: with TemporaryDirectory() as tmp_dir_name:
traced_gpt2 = torch.jit.trace(model, inputs) pt_file_name = os.path.join(tmp_dir_name, "traced_model.pt")
torch.jit.save(traced_gpt2, "traced_model.pt")
except RuntimeError:
self.fail("Couldn't save module.")
try: try:
loaded_model = torch.jit.load("traced_model.pt") torch.jit.save(traced_gpt2, pt_file_name)
os.remove("traced_model.pt") except Exception:
except ValueError: self.fail("Couldn't save module.")
self.fail("Couldn't load module.")
try:
loaded_model = torch.jit.load(pt_file_name)
except Exception:
self.fail("Couldn't load module.")
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -223,7 +251,6 @@ class CommonTestCases: ...@@ -223,7 +251,6 @@ class CommonTestCases:
self.assertTrue(models_equal) self.assertTrue(models_equal)
def test_headmasking(self): def test_headmasking(self):
if not self.test_head_masking: if not self.test_head_masking:
return return
...@@ -278,7 +305,6 @@ class CommonTestCases: ...@@ -278,7 +305,6 @@ class CommonTestCases:
self.assertNotEqual( self.assertNotEqual(
attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0) attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0)
def test_head_pruning(self): def test_head_pruning(self):
if not self.test_pruning: if not self.test_pruning:
return return
...@@ -297,7 +323,8 @@ class CommonTestCases: ...@@ -297,7 +323,8 @@ class CommonTestCases:
heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)), heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)),
-1: [0]} -1: [0]}
model.prune_heads(heads_to_prune) model.prune_heads(heads_to_prune)
outputs = model(**inputs_dict) with torch.no_grad():
outputs = model(**inputs_dict)
attentions = outputs[-1] attentions = outputs[-1]
...@@ -326,20 +353,19 @@ class CommonTestCases: ...@@ -326,20 +353,19 @@ class CommonTestCases:
heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)), heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)),
-1: [0]} -1: [0]}
model.prune_heads(heads_to_prune) model.prune_heads(heads_to_prune)
directory = "pruned_model"
if not os.path.exists(directory):
os.makedirs(directory)
model.save_pretrained(directory)
model = model_class.from_pretrained(directory)
model.to(torch_device)
outputs = model(**inputs_dict) with TemporaryDirectory() as temp_dir_name:
model.save_pretrained(temp_dir_name)
model = model_class.from_pretrained(temp_dir_name)
model.to(torch_device)
with torch.no_grad():
outputs = model(**inputs_dict)
attentions = outputs[-1] attentions = outputs[-1]
self.assertEqual(attentions[0].shape[-3], 1) self.assertEqual(attentions[0].shape[-3], 1)
self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads) self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads)
self.assertEqual(attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1) self.assertEqual(attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
shutil.rmtree(directory)
def test_head_pruning_save_load_from_config_init(self): def test_head_pruning_save_load_from_config_init(self):
if not self.test_pruning: if not self.test_pruning:
...@@ -362,7 +388,8 @@ class CommonTestCases: ...@@ -362,7 +388,8 @@ class CommonTestCases:
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
outputs = model(**inputs_dict) with torch.no_grad():
outputs = model(**inputs_dict)
attentions = outputs[-1] attentions = outputs[-1]
self.assertEqual(attentions[0].shape[-3], 1) self.assertEqual(attentions[0].shape[-3], 1)
...@@ -389,7 +416,8 @@ class CommonTestCases: ...@@ -389,7 +416,8 @@ class CommonTestCases:
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
outputs = model(**inputs_dict) with torch.no_grad():
outputs = model(**inputs_dict)
attentions = outputs[-1] attentions = outputs[-1]
self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads - 1) self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads - 1)
...@@ -397,16 +425,13 @@ class CommonTestCases: ...@@ -397,16 +425,13 @@ class CommonTestCases:
self.assertEqual(attentions[2].shape[-3], self.model_tester.num_attention_heads) self.assertEqual(attentions[2].shape[-3], self.model_tester.num_attention_heads)
self.assertEqual(attentions[3].shape[-3], self.model_tester.num_attention_heads) self.assertEqual(attentions[3].shape[-3], self.model_tester.num_attention_heads)
directory = "pruned_model" with TemporaryDirectory() as temp_dir_name:
model.save_pretrained(temp_dir_name)
if not os.path.exists(directory): model = model_class.from_pretrained(temp_dir_name)
os.makedirs(directory) model.to(torch_device)
model.save_pretrained(directory)
model = model_class.from_pretrained(directory)
model.to(torch_device)
shutil.rmtree(directory)
outputs = model(**inputs_dict) with torch.no_grad():
outputs = model(**inputs_dict)
attentions = outputs[-1] attentions = outputs[-1]
self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads - 1) self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads - 1)
...@@ -417,7 +442,8 @@ class CommonTestCases: ...@@ -417,7 +442,8 @@ class CommonTestCases:
heads_to_prune = {0: [0], 2: [1, 2]} heads_to_prune = {0: [0], 2: [1, 2]}
model.prune_heads(heads_to_prune) model.prune_heads(heads_to_prune)
outputs = model(**inputs_dict) with torch.no_grad():
outputs = model(**inputs_dict)
attentions = outputs[-1] attentions = outputs[-1]
self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads -1) self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads -1)
...@@ -427,7 +453,6 @@ class CommonTestCases: ...@@ -427,7 +453,6 @@ class CommonTestCases:
self.assertDictEqual(model.config.pruned_heads, {0: [0], 1: [1, 2], 2: [1, 2]}) self.assertDictEqual(model.config.pruned_heads, {0: [0], 1: [1, 2], 2: [1, 2]})
def test_hidden_states_output(self): def test_hidden_states_output(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -437,14 +462,16 @@ class CommonTestCases: ...@@ -437,14 +462,16 @@ class CommonTestCases:
model = model_class(config) model = model_class(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
outputs = model(**inputs_dict) with torch.no_grad():
outputs = model(**inputs_dict)
hidden_states = outputs[-1] hidden_states = outputs[-1]
self.assertEqual(model.config.output_attentions, False) self.assertEqual(model.config.output_attentions, False)
self.assertEqual(model.config.output_hidden_states, True) self.assertEqual(model.config.output_hidden_states, True)
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1) self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
self.assertListEqual( self.assertListEqual(
list(hidden_states[0].shape[-2:]), list(hidden_states[0].shape[-2:]),
[self.model_tester.seq_length, self.model_tester.hidden_size]) [self.model_tester.encoder_seq_length if hasattr(self.model_tester, 'encoder_seq_length') else self.model_tester.seq_length,
self.model_tester.hidden_size])
def test_resize_tokens_embeddings(self): def test_resize_tokens_embeddings(self):
original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -550,8 +577,14 @@ class CommonTestCases: ...@@ -550,8 +577,14 @@ class CommonTestCases:
def test_inputs_embeds(self): def test_inputs_embeds(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
input_ids = inputs_dict["input_ids"] if not self.is_encoder_decoder:
del inputs_dict["input_ids"] input_ids = inputs_dict["input_ids"]
del inputs_dict["input_ids"]
else:
encoder_input_ids = inputs_dict["encoder_input_ids"]
decoder_input_ids = inputs_dict["decoder_input_ids"]
del inputs_dict["encoder_input_ids"]
del inputs_dict["decoder_input_ids"]
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
...@@ -559,9 +592,14 @@ class CommonTestCases: ...@@ -559,9 +592,14 @@ class CommonTestCases:
model.eval() model.eval()
wte = model.get_input_embeddings() wte = model.get_input_embeddings()
inputs_dict["inputs_embeds"] = wte(input_ids) if not self.is_encoder_decoder:
outputs = model(**inputs_dict) inputs_dict["inputs_embeds"] = wte(input_ids)
else:
inputs_dict["encoder_inputs_embeds"] = wte(encoder_input_ids)
inputs_dict["decoder_inputs_embeds"] = wte(decoder_input_ids)
with torch.no_grad():
outputs = model(**inputs_dict)
class GPTModelTester(CommonModelTester): class GPTModelTester(CommonModelTester):
...@@ -633,7 +671,7 @@ class CommonTestCases: ...@@ -633,7 +671,7 @@ class CommonTestCases:
mc_token_ids = ids_tensor([self.batch_size, self.n_choices], self.seq_length) mc_token_ids = ids_tensor([self.batch_size, self.n_choices], self.seq_length)
config = self.config_class( config = self.config_class(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
n_positions=self.n_positions, n_positions=self.n_positions,
n_embd=self.hidden_size, n_embd=self.hidden_size,
n_layer=self.num_hidden_layers, n_layer=self.num_hidden_layers,
...@@ -649,9 +687,10 @@ class CommonTestCases: ...@@ -649,9 +687,10 @@ class CommonTestCases:
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
outputs = model(input_ids, position_ids, token_type_ids) with torch.no_grad():
outputs = model(input_ids, position_ids) outputs = model(input_ids, position_ids, token_type_ids)
outputs = model(input_ids) outputs = model(input_ids, position_ids)
outputs = model(input_ids)
hidden_state = outputs[0] hidden_state = outputs[0]
self.parent.assertListEqual( self.parent.assertListEqual(
...@@ -664,7 +703,8 @@ class CommonTestCases: ...@@ -664,7 +703,8 @@ class CommonTestCases:
model = self.lm_head_model_class(config) model = self.lm_head_model_class(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
outputs = model(input_ids, position_ids, token_type_ids, lm_labels) with torch.no_grad():
outputs = model(input_ids, position_ids, token_type_ids, lm_labels)
loss, lm_logits = outputs[:2] loss, lm_logits = outputs[:2]
total_voc = self.vocab_size total_voc = self.vocab_size
...@@ -681,7 +721,8 @@ class CommonTestCases: ...@@ -681,7 +721,8 @@ class CommonTestCases:
model = model_class(config) model = model_class(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
outputs = model(input_ids) with torch.no_grad():
outputs = model(input_ids)
presents = outputs[-1] presents = outputs[-1]
self.parent.assertEqual(self.num_hidden_layers, len(presents)) self.parent.assertEqual(self.num_hidden_layers, len(presents))
self.parent.assertListEqual( self.parent.assertListEqual(
...@@ -694,7 +735,8 @@ class CommonTestCases: ...@@ -694,7 +735,8 @@ class CommonTestCases:
model = self.double_head_model_class(config) model = self.double_head_model_class(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
outputs = model(input_ids, mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels, with torch.no_grad():
outputs = model(input_ids, mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels,
token_type_ids=token_type_ids, position_ids=position_ids) token_type_ids=token_type_ids, position_ids=position_ids)
lm_loss, mc_loss, lm_logits, mc_logits = outputs[:4] lm_loss, mc_loss, lm_logits, mc_logits = outputs[:4]
loss = [lm_loss, mc_loss] loss = [lm_loss, mc_loss]
...@@ -711,10 +753,8 @@ class CommonTestCases: ...@@ -711,10 +753,8 @@ class CommonTestCases:
[[], []]) [[], []])
def create_and_check_model_from_pretrained(self): def create_and_check_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(self.base_model_class.pretrained_model_archive_map.keys())[:1]: for model_name in list(self.base_model_class.pretrained_model_archive_map.keys())[:1]:
model = self.base_model_class.from_pretrained(model_name, cache_dir=cache_dir) model = self.base_model_class.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.parent.assertIsNotNone(model) self.parent.assertIsNotNone(model)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
......
...@@ -16,7 +16,6 @@ from __future__ import division ...@@ -16,7 +16,6 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
import pdb import pdb
from transformers import is_torch_available from transformers import is_torch_available
...@@ -27,7 +26,7 @@ if is_torch_available(): ...@@ -27,7 +26,7 @@ if is_torch_available():
from .modeling_common_test import (CommonTestCases, ids_tensor) from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
@require_torch @require_torch
...@@ -114,7 +113,7 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): ...@@ -114,7 +113,7 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = CTRLConfig( config = CTRLConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
n_embd=self.hidden_size, n_embd=self.hidden_size,
n_layer=self.num_hidden_layers, n_layer=self.num_hidden_layers,
n_head=self.num_attention_heads, n_head=self.num_attention_heads,
...@@ -205,10 +204,8 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): ...@@ -205,10 +204,8 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = CTRLModel.from_pretrained(model_name, cache_dir=cache_dir) model = CTRLModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
......
...@@ -27,7 +27,7 @@ if is_torch_available(): ...@@ -27,7 +27,7 @@ if is_torch_available():
from .modeling_common_test import (CommonTestCases, ids_tensor) from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
@require_torch @require_torch
...@@ -105,7 +105,7 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -105,7 +105,7 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = DistilBertConfig( config = DistilBertConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
dim=self.hidden_size, dim=self.hidden_size,
n_layers=self.num_hidden_layers, n_layers=self.num_hidden_layers,
n_heads=self.num_attention_heads, n_heads=self.num_attention_heads,
...@@ -235,10 +235,8 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -235,10 +235,8 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
# @slow # @slow
# def test_model_from_pretrained(self): # def test_model_from_pretrained(self):
# cache_dir = "/tmp/transformers_test/"
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# model = DistilBertModel.from_pretrained(model_name, cache_dir=cache_dir) # model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# shutil.rmtree(cache_dir)
# self.assertIsNotNone(model) # self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -17,7 +17,6 @@ from __future__ import division ...@@ -17,7 +17,6 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
from transformers import is_torch_available from transformers import is_torch_available
...@@ -27,7 +26,7 @@ if is_torch_available(): ...@@ -27,7 +26,7 @@ if is_torch_available():
from .modeling_common_test import (CommonTestCases, ids_tensor) from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
@require_torch @require_torch
...@@ -110,7 +109,7 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester): ...@@ -110,7 +109,7 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = GPT2Config( config = GPT2Config(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
n_embd=self.hidden_size, n_embd=self.hidden_size,
n_layer=self.num_hidden_layers, n_layer=self.num_hidden_layers,
n_head=self.num_attention_heads, n_head=self.num_attention_heads,
...@@ -239,10 +238,8 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester): ...@@ -239,10 +238,8 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = GPT2Model.from_pretrained(model_name, cache_dir=cache_dir) model = GPT2Model.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
......
...@@ -17,7 +17,6 @@ from __future__ import division ...@@ -17,7 +17,6 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
from transformers import is_torch_available from transformers import is_torch_available
...@@ -27,7 +26,7 @@ if is_torch_available(): ...@@ -27,7 +26,7 @@ if is_torch_available():
from .modeling_common_test import (CommonTestCases, ids_tensor) from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
@require_torch @require_torch
...@@ -98,7 +97,7 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): ...@@ -98,7 +97,7 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = OpenAIGPTConfig( config = OpenAIGPTConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
n_embd=self.hidden_size, n_embd=self.hidden_size,
n_layer=self.num_hidden_layers, n_layer=self.num_hidden_layers,
n_head=self.num_attention_heads, n_head=self.num_attention_heads,
...@@ -207,10 +206,8 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): ...@@ -207,10 +206,8 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = OpenAIGPTModel.from_pretrained(model_name, cache_dir=cache_dir) model = OpenAIGPTModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
......
...@@ -17,7 +17,6 @@ from __future__ import division ...@@ -17,7 +17,6 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
from transformers import is_torch_available from transformers import is_torch_available
...@@ -25,11 +24,12 @@ if is_torch_available(): ...@@ -25,11 +24,12 @@ if is_torch_available():
import torch import torch
from transformers import (RobertaConfig, RobertaModel, RobertaForMaskedLM, from transformers import (RobertaConfig, RobertaModel, RobertaForMaskedLM,
RobertaForSequenceClassification, RobertaForTokenClassification) RobertaForSequenceClassification, RobertaForTokenClassification)
from transformers.modeling_roberta import RobertaEmbeddings
from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_common_test import (CommonTestCases, ids_tensor) from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
@require_torch @require_torch
...@@ -106,7 +106,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -106,7 +106,7 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = RobertaConfig( config = RobertaConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
hidden_size=self.hidden_size, hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers, num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads, num_attention_heads=self.num_attention_heads,
...@@ -199,12 +199,61 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -199,12 +199,61 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = RobertaModel.from_pretrained(model_name, cache_dir=cache_dir) model = RobertaModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
def test_create_position_ids_respects_padding_index(self):
""" Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
first available non-padding position index is RobertaEmbeddings.padding_idx + 1
"""
config = self.model_tester.prepare_config_and_inputs()[0]
model = RobertaEmbeddings(config=config)
input_ids = torch.as_tensor([[12, 31, 13, model.padding_idx]])
expected_positions = torch.as_tensor([[
0 + model.padding_idx + 1,
1 + model.padding_idx + 1,
2 + model.padding_idx + 1,
model.padding_idx
]])
position_ids = model.create_position_ids_from_input_ids(input_ids)
self.assertEqual(
position_ids.shape,
expected_positions.shape
)
self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))
def test_create_position_ids_from_inputs_embeds(self):
""" Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
first available non-padding position index is RobertaEmbeddings.padding_idx + 1
"""
config = self.model_tester.prepare_config_and_inputs()[0]
embeddings = RobertaEmbeddings(config=config)
inputs_embeds = torch.Tensor(2, 4, 30)
expected_single_positions = [
0 + embeddings.padding_idx + 1,
1 + embeddings.padding_idx + 1,
2 + embeddings.padding_idx + 1,
3 + embeddings.padding_idx + 1,
]
expected_positions = torch.as_tensor([expected_single_positions, expected_single_positions])
position_ids = embeddings.create_position_ids_from_inputs_embeds(inputs_embeds)
self.assertEqual(
position_ids.shape,
expected_positions.shape
)
self.assertTrue(
torch.all(torch.eq(position_ids, expected_positions))
)
class RobertaModelIntegrationTest(unittest.TestCase): class RobertaModelIntegrationTest(unittest.TestCase):
......
# coding=utf-8
# Copyright 2018 Google T5 Authors and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
from transformers import is_torch_available
from .modeling_common_test import (CommonTestCases, ids_tensor, floats_tensor)
from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available():
from transformers import (T5Config, T5Model, T5WithLMHeadModel)
from transformers.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP
@require_torch
class T5ModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (T5Model, T5WithLMHeadModel) if is_torch_available() else ()
test_pruning = False
test_torchscript = False
test_resize_embeddings = False
is_encoder_decoder = True
class T5ModelTester(object):
def __init__(self,
parent,
batch_size=13,
encoder_seq_length=7,
decoder_seq_length=9,
is_training=True,
use_attention_mask=True,
use_labels=True,
vocab_size=99,
n_positions=14,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
dropout_rate=0.1,
initializer_factor=0.002,
scope=None,
):
self.parent = parent
self.batch_size = batch_size
self.encoder_seq_length = encoder_seq_length
self.decoder_seq_length = decoder_seq_length
self.is_training = is_training
self.use_attention_mask = use_attention_mask
self.use_labels = use_labels
self.vocab_size = vocab_size
self.n_positions = n_positions
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.d_ff = d_ff
self.relative_attention_num_buckets = relative_attention_num_buckets
self.dropout_rate = dropout_rate
self.initializer_factor = initializer_factor
self.scope = scope
def prepare_config_and_inputs(self):
encoder_input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
decoder_input_ids = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size)
encoder_attention_mask = None
decoder_attention_mask = None
if self.use_attention_mask:
encoder_attention_mask = ids_tensor([self.batch_size, self.encoder_seq_length], vocab_size=2)
decoder_attention_mask = ids_tensor([self.batch_size, self.decoder_seq_length], vocab_size=2)
decoder_lm_labels = None
if self.use_labels:
decoder_lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size)
config = T5Config(
vocab_size=self.vocab_size,
n_positions=self.n_positions,
d_model=self.hidden_size,
d_ff=self.d_ff,
d_kv=self.hidden_size // self.num_attention_heads,
num_layers=self.num_hidden_layers,
num_heads=self.num_attention_heads,
relative_attention_num_buckets=self.relative_attention_num_buckets,
dropout_rate=self.dropout_rate,
initializer_factor=self.initializer_factor)
return (config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, decoder_attention_mask, decoder_lm_labels)
def check_loss_output(self, result):
self.parent.assertListEqual(
list(result["loss"].size()),
[])
def create_and_check_t5_model(self, config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, decoder_attention_mask, decoder_lm_labels):
model = T5Model(config=config)
model.eval()
decoder_output, encoder_output = model(encoder_input_ids=encoder_input_ids,
decoder_input_ids=decoder_input_ids,
encoder_attention_mask=encoder_attention_mask,
decoder_attention_mask=decoder_attention_mask)
decoder_output, encoder_output = model(encoder_input_ids=encoder_input_ids,
decoder_input_ids=decoder_input_ids)
result = {
"encoder_output": encoder_output,
"decoder_output": decoder_output,
}
self.parent.assertListEqual(
list(result["encoder_output"].size()),
[self.batch_size, self.encoder_seq_length, self.hidden_size])
self.parent.assertListEqual(
list(result["decoder_output"].size()),
[self.batch_size, self.decoder_seq_length, self.hidden_size])
def create_and_check_t5_with_lm_head(self, config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, decoder_attention_mask, decoder_lm_labels):
model = T5WithLMHeadModel(config=config)
model.eval()
outputs = model(encoder_input_ids=encoder_input_ids, decoder_input_ids=decoder_input_ids,
decoder_attention_mask=decoder_attention_mask, decoder_lm_labels=decoder_lm_labels)
loss, prediction_scores = outputs[0], outputs[1]
result = {
"loss": loss,
"prediction_scores": prediction_scores,
}
self.parent.assertListEqual(
list(result["prediction_scores"].size()),
[self.batch_size, self.decoder_seq_length, self.vocab_size])
self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, encoder_input_ids, decoder_input_ids, encoder_attention_mask,
decoder_attention_mask, decoder_lm_labels) = config_and_inputs
inputs_dict = {'encoder_input_ids': encoder_input_ids,
'decoder_input_ids': decoder_input_ids,
'decoder_attention_mask': decoder_attention_mask,
'encoder_attention_mask': encoder_attention_mask}
return config, inputs_dict
def setUp(self):
self.model_tester = T5ModelTest.T5ModelTester(self)
self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37)
def test_config(self):
self.config_tester.run_common_tests()
def test_t5_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_t5_model(*config_and_inputs)
def test_with_lm_head(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_t5_with_lm_head(*config_and_inputs)
@slow
def test_model_from_pretrained(self):
for model_name in list(T5_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = T5Model.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model)
if __name__ == "__main__":
unittest.main()
...@@ -17,12 +17,11 @@ from __future__ import division ...@@ -17,12 +17,11 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import AlbertConfig, is_tf_available from transformers import AlbertConfig, is_tf_available
...@@ -118,7 +117,7 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -118,7 +117,7 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = AlbertConfig( config = AlbertConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
hidden_size=self.hidden_size, hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers, num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads, num_attention_heads=self.num_attention_heads,
...@@ -217,12 +216,8 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -217,12 +216,8 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/" for model_name in list(TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# for model_name in list(TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: model = TFAlbertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
for model_name in ['albert-base-uncased']:
model = TFAlbertModel.from_pretrained(
model_name, cache_dir=cache_dir)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
......
...@@ -22,7 +22,7 @@ import logging ...@@ -22,7 +22,7 @@ import logging
from transformers import is_tf_available from transformers import is_tf_available
from .utils import require_tf, slow from .utils import require_tf, slow, SMALL_MODEL_IDENTIFIER
if is_tf_available(): if is_tf_available():
from transformers import (AutoConfig, BertConfig, from transformers import (AutoConfig, BertConfig,
...@@ -46,11 +46,11 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -46,11 +46,11 @@ class TFAutoModelTest(unittest.TestCase):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ['bert-base-uncased']:
config = AutoConfig.from_pretrained(model_name, force_download=True) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
model = TFAutoModel.from_pretrained(model_name, force_download=True) model = TFAutoModel.from_pretrained(model_name)
self.assertIsNotNone(model) self.assertIsNotNone(model)
self.assertIsInstance(model, TFBertModel) self.assertIsInstance(model, TFBertModel)
...@@ -59,11 +59,11 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -59,11 +59,11 @@ class TFAutoModelTest(unittest.TestCase):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ['bert-base-uncased']:
config = AutoConfig.from_pretrained(model_name, force_download=True) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
model = TFAutoModelWithLMHead.from_pretrained(model_name, force_download=True) model = TFAutoModelWithLMHead.from_pretrained(model_name)
self.assertIsNotNone(model) self.assertIsNotNone(model)
self.assertIsInstance(model, TFBertForMaskedLM) self.assertIsInstance(model, TFBertForMaskedLM)
...@@ -72,11 +72,11 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -72,11 +72,11 @@ class TFAutoModelTest(unittest.TestCase):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ['bert-base-uncased']:
config = AutoConfig.from_pretrained(model_name, force_download=True) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name, force_download=True) model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
self.assertIsNotNone(model) self.assertIsNotNone(model)
self.assertIsInstance(model, TFBertForSequenceClassification) self.assertIsInstance(model, TFBertForSequenceClassification)
...@@ -85,14 +85,19 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -85,14 +85,19 @@ class TFAutoModelTest(unittest.TestCase):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ['bert-base-uncased']:
config = AutoConfig.from_pretrained(model_name, force_download=True) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
model = TFAutoModelForQuestionAnswering.from_pretrained(model_name, force_download=True) model = TFAutoModelForQuestionAnswering.from_pretrained(model_name)
self.assertIsNotNone(model) self.assertIsNotNone(model)
self.assertIsInstance(model, TFBertForQuestionAnswering) self.assertIsInstance(model, TFBertForQuestionAnswering)
def test_from_pretrained_identifier(self):
logging.basicConfig(level=logging.INFO)
model = TFAutoModelWithLMHead.from_pretrained(SMALL_MODEL_IDENTIFIER)
self.assertIsInstance(model, TFBertForMaskedLM)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -17,12 +17,11 @@ from __future__ import division ...@@ -17,12 +17,11 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import BertConfig, is_tf_available from transformers import BertConfig, is_tf_available
...@@ -114,7 +113,7 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -114,7 +113,7 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = BertConfig( config = BertConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
hidden_size=self.hidden_size, hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers, num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads, num_attention_heads=self.num_attention_heads,
...@@ -310,11 +309,9 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -310,11 +309,9 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ['bert-base-uncased']:
model = TFBertModel.from_pretrained(model_name, cache_dir=cache_dir) model = TFBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -69,6 +69,7 @@ class TFCommonTestCases: ...@@ -69,6 +69,7 @@ class TFCommonTestCases:
test_torchscript = True test_torchscript = True
test_pruning = True test_pruning = True
test_resize_embeddings = True test_resize_embeddings = True
is_encoder_decoder = False
def test_initialization(self): def test_initialization(self):
pass pass
...@@ -129,8 +130,12 @@ class TFCommonTestCases: ...@@ -129,8 +130,12 @@ class TFCommonTestCases:
for name, key in inputs_dict.items()) for name, key in inputs_dict.items())
with torch.no_grad(): with torch.no_grad():
pto = pt_model(**pt_inputs_dict) pto = pt_model(**pt_inputs_dict)
tfo = tf_model(inputs_dict) tfo = tf_model(inputs_dict, training=False)
max_diff = np.amax(np.abs(tfo[0].numpy() - pto[0].numpy())) tf_hidden_states = tfo[0].numpy()
pt_hidden_states = pto[0].numpy()
tf_hidden_states[np.isnan(tf_hidden_states)] = 0
pt_hidden_states[np.isnan(pt_hidden_states)] = 0
max_diff = np.amax(np.abs(tf_hidden_states - pt_hidden_states))
self.assertLessEqual(max_diff, 2e-2) self.assertLessEqual(max_diff, 2e-2)
# Check we can load pt model in tf and vice-versa with checkpoint => model functions # Check we can load pt model in tf and vice-versa with checkpoint => model functions
...@@ -150,13 +155,21 @@ class TFCommonTestCases: ...@@ -150,13 +155,21 @@ class TFCommonTestCases:
with torch.no_grad(): with torch.no_grad():
pto = pt_model(**pt_inputs_dict) pto = pt_model(**pt_inputs_dict)
tfo = tf_model(inputs_dict) tfo = tf_model(inputs_dict)
max_diff = np.amax(np.abs(tfo[0].numpy() - pto[0].numpy())) tfo = tfo[0].numpy()
pto = pto[0].numpy()
tfo[np.isnan(tfo)] = 0
pto[np.isnan(pto)] = 0
max_diff = np.amax(np.abs(tfo - pto))
self.assertLessEqual(max_diff, 2e-2) self.assertLessEqual(max_diff, 2e-2)
def test_compile_tf_model(self): def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
input_ids = tf.keras.Input(batch_shape=(2, 2000), name='input_ids', dtype='int32') if self.is_encoder_decoder:
input_ids = {'decoder_input_ids': tf.keras.Input(batch_shape=(2, 2000), name='decoder_input_ids', dtype='int32'),
'encoder_input_ids': tf.keras.Input(batch_shape=(2, 2000), name='encoder_input_ids', dtype='int32')}
else:
input_ids = tf.keras.Input(batch_shape=(2, 2000), name='input_ids', dtype='int32')
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0) optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy') metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
...@@ -189,7 +202,7 @@ class TFCommonTestCases: ...@@ -189,7 +202,7 @@ class TFCommonTestCases:
outputs_dict = model(inputs_dict) outputs_dict = model(inputs_dict)
inputs_keywords = copy.deepcopy(inputs_dict) inputs_keywords = copy.deepcopy(inputs_dict)
input_ids = inputs_keywords.pop('input_ids') input_ids = inputs_keywords.pop('input_ids' if not self.is_encoder_decoder else 'decoder_input_ids', None)
outputs_keywords = model(input_ids, **inputs_keywords) outputs_keywords = model(input_ids, **inputs_keywords)
output_dict = outputs_dict[0].numpy() output_dict = outputs_dict[0].numpy()
...@@ -200,6 +213,11 @@ class TFCommonTestCases: ...@@ -200,6 +213,11 @@ class TFCommonTestCases:
def test_attention_outputs(self): def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
decoder_seq_length = self.model_tester.decoder_seq_length if hasattr(self.model_tester, 'decoder_seq_length') else self.model_tester.seq_length
encoder_seq_length = self.model_tester.encoder_seq_length if hasattr(self.model_tester, 'encoder_seq_length') else self.model_tester.seq_length
decoder_key_length = self.model_tester.key_length if hasattr(self.model_tester, 'key_length') else decoder_seq_length
encoder_key_length = self.model_tester.key_length if hasattr(self.model_tester, 'key_length') else encoder_seq_length
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config.output_attentions = True config.output_attentions = True
config.output_hidden_states = False config.output_hidden_states = False
...@@ -212,16 +230,28 @@ class TFCommonTestCases: ...@@ -212,16 +230,28 @@ class TFCommonTestCases:
self.assertListEqual( self.assertListEqual(
list(attentions[0].shape[-3:]), list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads,
self.model_tester.seq_length, encoder_seq_length,
self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length]) encoder_key_length])
out_len = len(outputs) out_len = len(outputs)
if self.is_encoder_decoder:
self.assertEqual(out_len % 2, 0)
decoder_attentions = outputs[(out_len // 2)-1]
self.assertEqual(model.config.output_attentions, True)
self.assertEqual(model.config.output_hidden_states, False)
self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(decoder_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads,
decoder_seq_length,
decoder_key_length])
# Check attention is always last and order is fine # Check attention is always last and order is fine
config.output_attentions = True config.output_attentions = True
config.output_hidden_states = True config.output_hidden_states = True
model = model_class(config) model = model_class(config)
outputs = model(inputs_dict) outputs = model(inputs_dict)
self.assertEqual(out_len+1, len(outputs)) self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1), len(outputs))
self.assertEqual(model.config.output_attentions, True) self.assertEqual(model.config.output_attentions, True)
self.assertEqual(model.config.output_hidden_states, True) self.assertEqual(model.config.output_hidden_states, True)
...@@ -230,8 +260,8 @@ class TFCommonTestCases: ...@@ -230,8 +260,8 @@ class TFCommonTestCases:
self.assertListEqual( self.assertListEqual(
list(attentions[0].shape[-3:]), list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads,
self.model_tester.seq_length, encoder_seq_length,
self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length]) encoder_key_length])
def test_hidden_states_output(self): def test_hidden_states_output(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -264,35 +294,53 @@ class TFCommonTestCases: ...@@ -264,35 +294,53 @@ class TFCommonTestCases:
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
first, second = model(inputs_dict, training=False)[0], model(inputs_dict, training=False)[0] first, second = model(inputs_dict, training=False)[0], model(inputs_dict, training=False)[0]
self.assertTrue(tf.math.equal(first, second).numpy().all()) out_1 = first.numpy()
out_2 = second.numpy()
out_1 = out_1[~np.isnan(out_1)]
out_2 = out_2[~np.isnan(out_2)]
max_diff = np.amax(np.abs(out_1 - out_2))
self.assertLessEqual(max_diff, 1e-5)
def _get_embeds(self, wte, input_ids):
# ^^ In our TF models, the input_embeddings can take slightly different forms,
# so we try a few of them.
# We used to fall back to just synthetically creating a dummy tensor of ones:
try:
x = wte(input_ids, mode="embedding")
except:
try:
x = wte([input_ids], mode="embedding")
except:
try:
x = wte([input_ids, None, None, None], mode="embedding")
except:
if hasattr(self.model_tester, "embedding_size"):
x = tf.ones(input_ids.shape + [self.model_tester.embedding_size], dtype=tf.dtypes.float32)
else:
x = tf.ones(input_ids.shape + [self.model_tester.hidden_size], dtype=tf.dtypes.float32)
return x
def test_inputs_embeds(self): def test_inputs_embeds(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
input_ids = inputs_dict["input_ids"] if not self.is_encoder_decoder:
del inputs_dict["input_ids"] input_ids = inputs_dict["input_ids"]
del inputs_dict["input_ids"]
else:
encoder_input_ids = inputs_dict["encoder_input_ids"]
decoder_input_ids = inputs_dict["decoder_input_ids"]
del inputs_dict["encoder_input_ids"]
del inputs_dict["decoder_input_ids"]
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
wte = model.get_input_embeddings() wte = model.get_input_embeddings()
try: if not self.is_encoder_decoder:
x = wte(input_ids, mode="embedding") inputs_dict["inputs_embeds"] = self._get_embeds(wte, input_ids)
except: else:
try: inputs_dict["encoder_inputs_embeds"] = self._get_embeds(wte, encoder_input_ids)
x = wte([input_ids], mode="embedding") inputs_dict["decoder_inputs_embeds"] = self._get_embeds(wte, decoder_input_ids)
except:
try:
x = wte([input_ids, None, None, None], mode="embedding")
except:
if hasattr(self.model_tester, "embedding_size"):
x = tf.ones(input_ids.shape + [self.model_tester.embedding_size], dtype=tf.dtypes.float32)
else:
x = tf.ones(input_ids.shape + [self.model_tester.hidden_size], dtype=tf.dtypes.float32)
# ^^ In our TF models, the input_embeddings can take slightly different forms,
# so we try a few of them.
# We used to fall back to just synthetically creating a dummy tensor of ones:
#
inputs_dict["inputs_embeds"] = x
outputs = model(inputs_dict) outputs = model(inputs_dict)
......
...@@ -17,12 +17,11 @@ from __future__ import division ...@@ -17,12 +17,11 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import CTRLConfig, is_tf_available from transformers import CTRLConfig, is_tf_available
...@@ -112,7 +111,7 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -112,7 +111,7 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = CTRLConfig( config = CTRLConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
n_embd=self.hidden_size, n_embd=self.hidden_size,
n_layer=self.num_hidden_layers, n_layer=self.num_hidden_layers,
n_head=self.num_attention_heads, n_head=self.num_attention_heads,
...@@ -189,10 +188,8 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -189,10 +188,8 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = TFCTRLModel.from_pretrained(model_name, cache_dir=cache_dir) model = TFCTRLModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -20,7 +20,7 @@ import unittest ...@@ -20,7 +20,7 @@ import unittest
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import DistilBertConfig, is_tf_available from transformers import DistilBertConfig, is_tf_available
...@@ -107,7 +107,7 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -107,7 +107,7 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = DistilBertConfig( config = DistilBertConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
dim=self.hidden_size, dim=self.hidden_size,
n_layers=self.num_hidden_layers, n_layers=self.num_hidden_layers,
n_heads=self.num_attention_heads, n_heads=self.num_attention_heads,
...@@ -211,10 +211,8 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -211,10 +211,8 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
# @slow # @slow
# def test_model_from_pretrained(self): # def test_model_from_pretrained(self):
# cache_dir = "/tmp/transformers_test/"
# for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# model = DistilBertModel.from_pretrained(model_name, cache_dir=cache_dir) # model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# shutil.rmtree(cache_dir)
# self.assertIsNotNone(model) # self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -17,12 +17,11 @@ from __future__ import division ...@@ -17,12 +17,11 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import GPT2Config, is_tf_available from transformers import GPT2Config, is_tf_available
...@@ -115,7 +114,7 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -115,7 +114,7 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = GPT2Config( config = GPT2Config(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
n_embd=self.hidden_size, n_embd=self.hidden_size,
n_layer=self.num_hidden_layers, n_layer=self.num_hidden_layers,
n_head=self.num_attention_heads, n_head=self.num_attention_heads,
...@@ -220,10 +219,8 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -220,10 +219,8 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = TFGPT2Model.from_pretrained(model_name, cache_dir=cache_dir) model = TFGPT2Model.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -17,12 +17,11 @@ from __future__ import division ...@@ -17,12 +17,11 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
import sys import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import OpenAIGPTConfig, is_tf_available from transformers import OpenAIGPTConfig, is_tf_available
...@@ -114,7 +113,7 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -114,7 +113,7 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = OpenAIGPTConfig( config = OpenAIGPTConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
n_embd=self.hidden_size, n_embd=self.hidden_size,
n_layer=self.num_hidden_layers, n_layer=self.num_hidden_layers,
n_head=self.num_attention_heads, n_head=self.num_attention_heads,
...@@ -219,10 +218,8 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -219,10 +218,8 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = TFOpenAIGPTModel.from_pretrained(model_name, cache_dir=cache_dir) model = TFOpenAIGPTModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -17,11 +17,10 @@ from __future__ import division ...@@ -17,11 +17,10 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import RobertaConfig, is_tf_available from transformers import RobertaConfig, is_tf_available
...@@ -109,7 +108,7 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -109,7 +108,7 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = RobertaConfig( config = RobertaConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
hidden_size=self.hidden_size, hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers, num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads, num_attention_heads=self.num_attention_heads,
...@@ -192,10 +191,8 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -192,10 +191,8 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = TFRobertaModel.from_pretrained(model_name, cache_dir=cache_dir) model = TFRobertaModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
......
# coding=utf-8
# Copyright 2018 Google T5 Authors and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow
from transformers import T5Config, is_tf_available
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_t5 import (TFT5Model, TFT5WithLMHeadModel,
TF_T5_PRETRAINED_MODEL_ARCHIVE_MAP)
@require_tf
class TFT5ModelTest(TFCommonTestCases.TFCommonModelTester):
is_encoder_decoder = True
all_model_classes = (TFT5Model, TFT5WithLMHeadModel) if is_tf_available() else ()
class TFT5ModelTester(object):
def __init__(self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_labels=True,
vocab_size=99,
n_positions=14,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
d_ff=37,
relative_attention_num_buckets=8,
dropout_rate=0.1,
initializer_factor=0.002,
scope=None,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_input_mask = use_input_mask
self.use_labels = use_labels
self.vocab_size = vocab_size
self.n_positions = n_positions
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.d_ff = d_ff
self.relative_attention_num_buckets = relative_attention_num_buckets
self.dropout_rate = dropout_rate
self.initializer_factor = initializer_factor
self.scope = scope
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = None
if self.use_input_mask:
input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
token_labels = None
if self.use_labels:
token_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
config = T5Config(
vocab_size=self.vocab_size,
n_positions=self.n_positions,
d_model=self.hidden_size,
d_ff=self.d_ff,
d_kv=self.hidden_size // self.num_attention_heads,
num_layers=self.num_hidden_layers,
num_heads=self.num_attention_heads,
relative_attention_num_buckets=self.relative_attention_num_buckets,
dropout_rate=self.dropout_rate,
initializer_factor=self.initializer_factor)
return (config, input_ids, input_mask, token_labels)
def create_and_check_t5_model(self, config, input_ids, input_mask, token_labels):
model = TFT5Model(config=config)
inputs = {'encoder_input_ids': input_ids,
'decoder_input_ids': input_ids,
'decoder_attention_mask': input_mask}
encoder_output, decoder_output = model(inputs)
encoder_output, decoder_output = model(input_ids,
decoder_attention_mask=input_mask,
encoder_input_ids=input_ids)
result = {
"encoder_output": encoder_output.numpy(),
"decoder_output": decoder_output.numpy(),
}
self.parent.assertListEqual(
list(result["encoder_output"].shape),
[self.batch_size, self.seq_length, self.hidden_size])
self.parent.assertListEqual(
list(result["decoder_output"].shape),
[self.batch_size, self.seq_length, self.hidden_size])
def create_and_check_t5_with_lm_head(self, config, input_ids, input_mask, token_labels):
model = TFT5WithLMHeadModel(config=config)
inputs = {'encoder_input_ids': input_ids,
'decoder_input_ids': input_ids,
'decoder_attention_mask': input_mask}
prediction_scores, decoder_output = model(inputs)
result = {
"prediction_scores": prediction_scores.numpy(),
}
self.parent.assertListEqual(
list(result["prediction_scores"].shape),
[self.batch_size, self.seq_length, self.vocab_size])
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, token_labels) = config_and_inputs
inputs_dict = {'encoder_input_ids': input_ids,
'decoder_input_ids': input_ids,
'decoder_attention_mask': input_mask}
return config, inputs_dict
def setUp(self):
self.model_tester = TFT5ModelTest.TFT5ModelTester(self)
self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37)
def test_config(self):
self.config_tester.run_common_tests()
def test_t5_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_t5_model(*config_and_inputs)
def test_with_lm_head(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_t5_with_lm_head(*config_and_inputs)
@slow
def test_model_from_pretrained(self):
for model_name in ['t5-small']:
model = TFT5Model.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model)
if __name__ == "__main__":
unittest.main()
...@@ -18,11 +18,10 @@ from __future__ import print_function ...@@ -18,11 +18,10 @@ from __future__ import print_function
import unittest import unittest
import random import random
import shutil
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import TransfoXLConfig, is_tf_available from transformers import TransfoXLConfig, is_tf_available
...@@ -67,7 +66,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -67,7 +66,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
self.mem_len = mem_len self.mem_len = mem_len
self.key_len = seq_length + mem_len self.key_length = seq_length + mem_len
self.clamp_len = clamp_len self.clamp_len = clamp_len
self.is_training = is_training self.is_training = is_training
self.use_labels = use_labels self.use_labels = use_labels
...@@ -92,7 +91,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -92,7 +91,7 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) lm_labels = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
config = TransfoXLConfig( config = TransfoXLConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
mem_len=self.mem_len, mem_len=self.mem_len,
clamp_len=self.clamp_len, clamp_len=self.clamp_len,
cutoffs=self.cutoffs, cutoffs=self.cutoffs,
...@@ -205,10 +204,8 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -205,10 +204,8 @@ class TFTransfoXLModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(TF_TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = TFTransfoXLModel.from_pretrained(model_name, cache_dir=cache_dir) model = TFTransfoXLModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
......
...@@ -17,7 +17,6 @@ from __future__ import division ...@@ -17,7 +17,6 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import unittest import unittest
import shutil
from transformers import is_tf_available from transformers import is_tf_available
...@@ -31,7 +30,7 @@ if is_tf_available(): ...@@ -31,7 +30,7 @@ if is_tf_available():
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_tf, slow from .utils import CACHE_DIR, require_tf, slow
@require_tf @require_tf
...@@ -125,7 +124,7 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -125,7 +124,7 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32) is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
config = XLMConfig( config = XLMConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
n_special=self.n_special, n_special=self.n_special,
emb_dim=self.hidden_size, emb_dim=self.hidden_size,
n_layers=self.num_hidden_layers, n_layers=self.num_hidden_layers,
...@@ -252,10 +251,8 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -252,10 +251,8 @@ class TFXLMModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(TF_XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = XLMModel.from_pretrained(model_name, cache_dir=cache_dir) model = TFXLMModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
......
...@@ -20,7 +20,6 @@ import os ...@@ -20,7 +20,6 @@ import os
import unittest import unittest
import json import json
import random import random
import shutil
from transformers import XLNetConfig, is_tf_available from transformers import XLNetConfig, is_tf_available
...@@ -35,7 +34,7 @@ if is_tf_available(): ...@@ -35,7 +34,7 @@ if is_tf_available():
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import require_tf, slow from .utils import CACHE_DIR, require_tf, slow
@require_tf @require_tf
...@@ -64,7 +63,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -64,7 +63,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
num_attention_heads=4, num_attention_heads=4,
d_inner=128, d_inner=128,
num_hidden_layers=5, num_hidden_layers=5,
max_position_embeddings=10,
type_sequence_label_size=2, type_sequence_label_size=2,
untie_r=True, untie_r=True,
bi_data=False, bi_data=False,
...@@ -88,7 +86,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -88,7 +86,6 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
self.num_attention_heads = num_attention_heads self.num_attention_heads = num_attention_heads
self.d_inner = d_inner self.d_inner = d_inner
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.max_position_embeddings = max_position_embeddings
self.bi_data = bi_data self.bi_data = bi_data
self.untie_r = untie_r self.untie_r = untie_r
self.same_length = same_length self.same_length = same_length
...@@ -122,13 +119,12 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -122,13 +119,12 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32) is_impossible_labels = ids_tensor([self.batch_size], 2, dtype=tf.float32)
config = XLNetConfig( config = XLNetConfig(
vocab_size_or_config_json_file=self.vocab_size, vocab_size=self.vocab_size,
d_model=self.hidden_size, d_model=self.hidden_size,
n_head=self.num_attention_heads, n_head=self.num_attention_heads,
d_inner=self.d_inner, d_inner=self.d_inner,
n_layer=self.num_hidden_layers, n_layer=self.num_hidden_layers,
untie_r=self.untie_r, untie_r=self.untie_r,
max_position_embeddings=self.max_position_embeddings,
mem_len=self.mem_len, mem_len=self.mem_len,
clamp_len=self.clamp_len, clamp_len=self.clamp_len,
same_length=self.same_length, same_length=self.same_length,
...@@ -322,10 +318,8 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -322,10 +318,8 @@ class TFXLNetModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
cache_dir = "/tmp/transformers_test/"
for model_name in list(TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: for model_name in list(TF_XLNET_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = TFXLNetModel.from_pretrained(model_name, cache_dir=cache_dir) model = TFXLNetModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model) self.assertIsNotNone(model)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment