"vscode:/vscode.git/clone" did not exist on "6c4c7be2828b3f2abf95f8e6feeff16e6bd880a6"
Unverified Commit 54abc67a authored by Thomas Wolf's avatar Thomas Wolf Committed by GitHub
Browse files

Merge pull request #2255 from aaugustin/implement-best-practices

Implement some Python best practices
parents 645713e2 c11b3e29
...@@ -12,22 +12,25 @@ ...@@ -12,22 +12,25 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_common_test import CommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available(): if is_torch_available():
from transformers import (AlbertConfig, AlbertModel, AlbertForMaskedLM, from transformers import (
AlbertForSequenceClassification, AlbertForQuestionAnswering, AlbertConfig,
) AlbertModel,
AlbertForMaskedLM,
AlbertForSequenceClassification,
AlbertForQuestionAnswering,
)
from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP from transformers.modeling_albert import ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
...@@ -37,33 +40,33 @@ class AlbertModelTest(CommonTestCases.CommonModelTester): ...@@ -37,33 +40,33 @@ class AlbertModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (AlbertModel, AlbertForMaskedLM) if is_torch_available() else () all_model_classes = (AlbertModel, AlbertForMaskedLM) if is_torch_available() else ()
class AlbertModelTester(object): class AlbertModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
embedding_size=16, embedding_size=16,
hidden_size=36, hidden_size=36,
num_hidden_layers=6, num_hidden_layers=6,
num_hidden_groups=6, num_hidden_groups=6,
num_attention_heads=6, num_attention_heads=6,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -120,16 +123,17 @@ class AlbertModelTest(CommonTestCases.CommonModelTester): ...@@ -120,16 +123,17 @@ class AlbertModelTest(CommonTestCases.CommonModelTester):
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range, initializer_range=self.initializer_range,
num_hidden_groups=self.num_hidden_groups) num_hidden_groups=self.num_hidden_groups,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_albert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_albert_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = AlbertModel(config=config) model = AlbertModel(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -142,66 +146,79 @@ class AlbertModelTest(CommonTestCases.CommonModelTester): ...@@ -142,66 +146,79 @@ class AlbertModelTest(CommonTestCases.CommonModelTester):
"pooled_output": pooled_output, "pooled_output": pooled_output,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
def create_and_check_albert_for_masked_lm(
def create_and_check_albert_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = AlbertForMaskedLM(config=config) model = AlbertForMaskedLM(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels) loss, prediction_scores = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
)
result = { result = {
"loss": loss, "loss": loss,
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].size()), list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_albert_for_question_answering(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_albert_for_question_answering(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = AlbertForQuestionAnswering(config=config) model = AlbertForQuestionAnswering(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, start_logits, end_logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, loss, start_logits, end_logits = model(
start_positions=sequence_labels, end_positions=sequence_labels) input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
)
result = { result = {
"loss": loss, "loss": loss,
"start_logits": start_logits, "start_logits": start_logits,
"end_logits": end_logits, "end_logits": end_logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length])
list(result["start_logits"].size()), self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length])
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].size()),
[self.batch_size, self.seq_length])
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_albert_for_sequence_classification(
def create_and_check_albert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = AlbertForSequenceClassification(config) model = AlbertForSequenceClassification(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels) loss, logits = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels
)
result = { result = {
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels])
list(result["logits"].size()),
[self.batch_size, self.num_labels])
self.check_loss_output(result) self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask, (
sequence_labels, token_labels, choice_labels) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -233,5 +250,6 @@ class AlbertModelTest(CommonTestCases.CommonModelTester): ...@@ -233,5 +250,6 @@ class AlbertModelTest(CommonTestCases.CommonModelTester):
model = AlbertModel.from_pretrained(model_name, cache_dir=CACHE_DIR) model = AlbertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -12,29 +12,31 @@ ...@@ -12,29 +12,31 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest
import shutil
import logging import logging
import unittest
from transformers import is_torch_available from transformers import is_torch_available
from .utils import require_torch, slow, SMALL_MODEL_IDENTIFIER from .utils import SMALL_MODEL_IDENTIFIER, require_torch, slow
if is_torch_available(): if is_torch_available():
from transformers import (AutoConfig, BertConfig, from transformers import (
AutoModel, BertModel, AutoConfig,
AutoModelWithLMHead, BertForMaskedLM, BertConfig,
AutoModelForSequenceClassification, BertForSequenceClassification, AutoModel,
AutoModelForQuestionAnswering, BertForQuestionAnswering) BertModel,
AutoModelWithLMHead,
BertForMaskedLM,
AutoModelForSequenceClassification,
BertForSequenceClassification,
AutoModelForQuestionAnswering,
BertForQuestionAnswering,
)
from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester
@require_torch @require_torch
class AutoModelTest(unittest.TestCase): class AutoModelTest(unittest.TestCase):
...@@ -75,7 +77,9 @@ class AutoModelTest(unittest.TestCase): ...@@ -75,7 +77,9 @@ class AutoModelTest(unittest.TestCase):
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
model = AutoModelForSequenceClassification.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name)
model, loading_info = AutoModelForSequenceClassification.from_pretrained(model_name, output_loading_info=True) model, loading_info = AutoModelForSequenceClassification.from_pretrained(
model_name, output_loading_info=True
)
self.assertIsNotNone(model) self.assertIsNotNone(model)
self.assertIsInstance(model, BertForSequenceClassification) self.assertIsInstance(model, BertForSequenceClassification)
......
...@@ -12,59 +12,75 @@ ...@@ -12,59 +12,75 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from .modeling_common_test import (CommonTestCases, ids_tensor, floats_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_common_test import CommonTestCases, floats_tensor, ids_tensor
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available(): if is_torch_available():
from transformers import (BertConfig, BertModel, BertForMaskedLM, from transformers import (
BertForNextSentencePrediction, BertForPreTraining, BertConfig,
BertForQuestionAnswering, BertForSequenceClassification, BertModel,
BertForTokenClassification, BertForMultipleChoice) BertForMaskedLM,
BertForNextSentencePrediction,
BertForPreTraining,
BertForQuestionAnswering,
BertForSequenceClassification,
BertForTokenClassification,
BertForMultipleChoice,
)
from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
@require_torch @require_torch
class BertModelTest(CommonTestCases.CommonModelTester): class BertModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (BertModel, BertForMaskedLM, BertForNextSentencePrediction, all_model_classes = (
BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification, (
BertForTokenClassification) if is_torch_available() else () BertModel,
BertForMaskedLM,
BertForNextSentencePrediction,
BertForPreTraining,
BertForQuestionAnswering,
BertForSequenceClassification,
BertForTokenClassification,
)
if is_torch_available()
else ()
)
class BertModelTester(object): class BertModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -119,25 +135,44 @@ class BertModelTest(CommonTestCases.CommonModelTester): ...@@ -119,25 +135,44 @@ class BertModelTest(CommonTestCases.CommonModelTester):
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
is_decoder=False, is_decoder=False,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def prepare_config_and_inputs_for_decoder(self): def prepare_config_and_inputs_for_decoder(self):
config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels = self.prepare_config_and_inputs() (
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = self.prepare_config_and_inputs()
config.is_decoder = True config.is_decoder = True
encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size]) encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size])
encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2) encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask return (
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
)
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_bert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = BertModel(config=config) model = BertModel(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -150,16 +185,38 @@ class BertModelTest(CommonTestCases.CommonModelTester): ...@@ -150,16 +185,38 @@ class BertModelTest(CommonTestCases.CommonModelTester):
"pooled_output": pooled_output, "pooled_output": pooled_output,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
def create_and_check_bert_model_as_decoder(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask): def create_and_check_bert_model_as_decoder(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
model = BertModel(config) model = BertModel(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask) sequence_output, pooled_output = model(
sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states) input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
)
sequence_output, pooled_output = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
encoder_hidden_states=encoder_hidden_states,
)
sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
result = { result = {
...@@ -167,122 +224,171 @@ class BertModelTest(CommonTestCases.CommonModelTester): ...@@ -167,122 +224,171 @@ class BertModelTest(CommonTestCases.CommonModelTester):
"pooled_output": pooled_output, "pooled_output": pooled_output,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
def create_and_check_bert_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_masked_lm(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = BertForMaskedLM(config=config) model = BertForMaskedLM(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels) loss, prediction_scores = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
)
result = { result = {
"loss": loss, "loss": loss,
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].size()), list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_bert_model_for_masked_lm_as_decoder(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask): def create_and_check_bert_model_for_masked_lm_as_decoder(
self,
config,
input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
encoder_hidden_states,
encoder_attention_mask,
):
model = BertForMaskedLM(config=config) model = BertForMaskedLM(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask) loss, prediction_scores = model(
loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels, encoder_hidden_states=encoder_hidden_states) input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
masked_lm_labels=token_labels,
encoder_hidden_states=encoder_hidden_states,
encoder_attention_mask=encoder_attention_mask,
)
loss, prediction_scores = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
masked_lm_labels=token_labels,
encoder_hidden_states=encoder_hidden_states,
)
result = { result = {
"loss": loss, "loss": loss,
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].size()), list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_bert_for_next_sequence_prediction(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_next_sequence_prediction(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = BertForNextSentencePrediction(config=config) model = BertForNextSentencePrediction(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, seq_relationship_score = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, next_sentence_label=sequence_labels) loss, seq_relationship_score = model(
input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
next_sentence_label=sequence_labels,
)
result = { result = {
"loss": loss, "loss": loss,
"seq_relationship_score": seq_relationship_score, "seq_relationship_score": seq_relationship_score,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["seq_relationship_score"].size()), [self.batch_size, 2])
list(result["seq_relationship_score"].size()),
[self.batch_size, 2])
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_bert_for_pretraining(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_pretraining(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = BertForPreTraining(config=config) model = BertForPreTraining(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, prediction_scores, seq_relationship_score = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, loss, prediction_scores, seq_relationship_score = model(
masked_lm_labels=token_labels, next_sentence_label=sequence_labels) input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
masked_lm_labels=token_labels,
next_sentence_label=sequence_labels,
)
result = { result = {
"loss": loss, "loss": loss,
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
"seq_relationship_score": seq_relationship_score, "seq_relationship_score": seq_relationship_score,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].size()), list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(result["seq_relationship_score"].size()), [self.batch_size, 2])
list(result["seq_relationship_score"].size()),
[self.batch_size, 2])
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_bert_for_question_answering(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_question_answering(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = BertForQuestionAnswering(config=config) model = BertForQuestionAnswering(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, start_logits, end_logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, loss, start_logits, end_logits = model(
start_positions=sequence_labels, end_positions=sequence_labels) input_ids,
attention_mask=input_mask,
token_type_ids=token_type_ids,
start_positions=sequence_labels,
end_positions=sequence_labels,
)
result = { result = {
"loss": loss, "loss": loss,
"start_logits": start_logits, "start_logits": start_logits,
"end_logits": end_logits, "end_logits": end_logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length])
list(result["start_logits"].size()), self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length])
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].size()),
[self.batch_size, self.seq_length])
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_bert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_sequence_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = BertForSequenceClassification(config) model = BertForSequenceClassification(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels) loss, logits = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels
)
result = { result = {
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels])
list(result["logits"].size()),
[self.batch_size, self.num_labels])
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_bert_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = BertForTokenClassification(config=config) model = BertForTokenClassification(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) loss, logits = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
)
result = { result = {
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].size()), list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
[self.batch_size, self.seq_length, self.num_labels]) )
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_bert_for_multiple_choice(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_multiple_choice(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_choices = self.num_choices config.num_choices = self.num_choices
model = BertForMultipleChoice(config=config) model = BertForMultipleChoice(config=config)
model.to(torch_device) model.to(torch_device)
...@@ -290,24 +396,31 @@ class BertModelTest(CommonTestCases.CommonModelTester): ...@@ -290,24 +396,31 @@ class BertModelTest(CommonTestCases.CommonModelTester):
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
loss, logits = model(multiple_choice_inputs_ids, loss, logits = model(
attention_mask=multiple_choice_input_mask, multiple_choice_inputs_ids,
token_type_ids=multiple_choice_token_type_ids, attention_mask=multiple_choice_input_mask,
labels=choice_labels) token_type_ids=multiple_choice_token_type_ids,
labels=choice_labels,
)
result = { result = {
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_choices])
list(result["logits"].size()),
[self.batch_size, self.num_choices])
self.check_loss_output(result) self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask, (
sequence_labels, token_labels, choice_labels) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
......
...@@ -12,58 +12,64 @@ ...@@ -12,58 +12,64 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import copy import copy
import sys import json
import logging
import os.path import os.path
import random
import shutil import shutil
import sys
import tempfile import tempfile
import json
import random
import uuid
import unittest import unittest
import logging import uuid
from transformers import is_torch_available from transformers import is_torch_available
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available(): if is_torch_available():
import torch import torch
import numpy as np import numpy as np
from transformers import (AdaptiveEmbedding, PretrainedConfig, PreTrainedModel, from transformers import (
BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP, AdaptiveEmbedding,
GPT2LMHeadModel, GPT2Config, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP) PretrainedConfig,
PreTrainedModel,
BertModel,
BertConfig,
BERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
if sys.version_info[0] == 2: if sys.version_info[0] == 2:
import cPickle as pickle
class TemporaryDirectory(object): class TemporaryDirectory(object):
"""Context manager for tempfile.mkdtemp() so it's usable with "with" statement.""" """Context manager for tempfile.mkdtemp() so it's usable with "with" statement."""
def __enter__(self): def __enter__(self):
self.name = tempfile.mkdtemp() self.name = tempfile.mkdtemp()
return self.name return self.name
def __exit__(self, exc_type, exc_value, traceback): def __exit__(self, exc_type, exc_value, traceback):
shutil.rmtree(self.name) shutil.rmtree(self.name)
else: else:
import pickle
TemporaryDirectory = tempfile.TemporaryDirectory TemporaryDirectory = tempfile.TemporaryDirectory
unicode = str unicode = str
def _config_zero_init(config): def _config_zero_init(config):
configs_no_init = copy.deepcopy(config) configs_no_init = copy.deepcopy(config)
for key in configs_no_init.__dict__.keys(): for key in configs_no_init.__dict__.keys():
if '_range' in key or '_std' in key or 'initializer_factor' in key: if "_range" in key or "_std" in key or "initializer_factor" in key:
setattr(configs_no_init, key, 0.0) setattr(configs_no_init, key, 0.0)
return configs_no_init return configs_no_init
class CommonTestCases:
class CommonTestCases:
@require_torch @require_torch
class CommonModelTester(unittest.TestCase): class CommonModelTester(unittest.TestCase):
...@@ -108,8 +114,11 @@ class CommonTestCases: ...@@ -108,8 +114,11 @@ class CommonTestCases:
model = model_class(config=configs_no_init) model = model_class(config=configs_no_init)
for name, param in model.named_parameters(): for name, param in model.named_parameters():
if param.requires_grad: if param.requires_grad:
self.assertIn(param.data.mean().item(), [0.0, 1.0], self.assertIn(
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class)) param.data.mean().item(),
[0.0, 1.0],
msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
)
def test_determinism(self): def test_determinism(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -131,10 +140,22 @@ class CommonTestCases: ...@@ -131,10 +140,22 @@ class CommonTestCases:
def test_attention_outputs(self): def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
decoder_seq_length = self.model_tester.decoder_seq_length if hasattr(self.model_tester, 'decoder_seq_length') else self.model_tester.seq_length decoder_seq_length = (
encoder_seq_length = self.model_tester.encoder_seq_length if hasattr(self.model_tester, 'encoder_seq_length') else self.model_tester.seq_length self.model_tester.decoder_seq_length
decoder_key_length = self.model_tester.key_length if hasattr(self.model_tester, 'key_length') else decoder_seq_length if hasattr(self.model_tester, "decoder_seq_length")
encoder_key_length = self.model_tester.key_length if hasattr(self.model_tester, 'key_length') else encoder_seq_length else self.model_tester.seq_length
)
encoder_seq_length = (
self.model_tester.encoder_seq_length
if hasattr(self.model_tester, "encoder_seq_length")
else self.model_tester.seq_length
)
decoder_key_length = (
self.model_tester.key_length if hasattr(self.model_tester, "key_length") else decoder_seq_length
)
encoder_key_length = (
self.model_tester.key_length if hasattr(self.model_tester, "key_length") else encoder_seq_length
)
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config.output_attentions = True config.output_attentions = True
...@@ -150,23 +171,20 @@ class CommonTestCases: ...@@ -150,23 +171,20 @@ class CommonTestCases:
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual( self.assertListEqual(
list(attentions[0].shape[-3:]), list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
encoder_seq_length , )
encoder_key_length])
out_len = len(outputs) out_len = len(outputs)
if self.is_encoder_decoder: if self.is_encoder_decoder:
self.assertEqual(out_len % 2, 0) self.assertEqual(out_len % 2, 0)
decoder_attentions = outputs[(out_len // 2)-1] decoder_attentions = outputs[(out_len // 2) - 1]
self.assertEqual(model.config.output_attentions, True) self.assertEqual(model.config.output_attentions, True)
self.assertEqual(model.config.output_hidden_states, False) self.assertEqual(model.config.output_hidden_states, False)
self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual( self.assertListEqual(
list(decoder_attentions[0].shape[-3:]), list(decoder_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length],
decoder_seq_length, )
decoder_key_length
])
# Check attention is always last and order is fine # Check attention is always last and order is fine
config.output_attentions = True config.output_attentions = True
...@@ -184,9 +202,8 @@ class CommonTestCases: ...@@ -184,9 +202,8 @@ class CommonTestCases:
self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual( self.assertListEqual(
list(self_attentions[0].shape[-3:]), list(self_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
encoder_seq_length, )
encoder_key_length])
def test_torchscript(self): def test_torchscript(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -215,7 +232,7 @@ class CommonTestCases: ...@@ -215,7 +232,7 @@ class CommonTestCases:
model = model_class(config=configs_no_init) model = model_class(config=configs_no_init)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
inputs = inputs_dict['input_ids'] # Let's keep only input_ids inputs = inputs_dict["input_ids"] # Let's keep only input_ids
try: try:
traced_gpt2 = torch.jit.trace(model, inputs) traced_gpt2 = torch.jit.trace(model, inputs)
...@@ -269,12 +286,14 @@ class CommonTestCases: ...@@ -269,12 +286,14 @@ class CommonTestCases:
# Prepare head_mask # Prepare head_mask
# Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior) # Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
head_mask = torch.ones(self.model_tester.num_hidden_layers, self.model_tester.num_attention_heads, device=torch_device) head_mask = torch.ones(
self.model_tester.num_hidden_layers, self.model_tester.num_attention_heads, device=torch_device
)
head_mask[0, 0] = 0 head_mask[0, 0] = 0
head_mask[-1, :-1] = 0 head_mask[-1, :-1] = 0
head_mask.requires_grad_(requires_grad=True) head_mask.requires_grad_(requires_grad=True)
inputs = inputs_dict.copy() inputs = inputs_dict.copy()
inputs['head_mask'] = head_mask inputs["head_mask"] = head_mask
outputs = model(**inputs) outputs = model(**inputs)
...@@ -289,21 +308,20 @@ class CommonTestCases: ...@@ -289,21 +308,20 @@ class CommonTestCases:
# Remove Nan # Remove Nan
for t in attentions: for t in attentions:
self.assertLess(torch.sum(torch.isnan(t)), t.numel() / 4) # Check we don't have more than 25% nans (arbitrary) self.assertLess(
attentions = [t.masked_fill(torch.isnan(t), 0.0) for t in attentions] # remove them (the test is less complete) torch.sum(torch.isnan(t)), t.numel() / 4
) # Check we don't have more than 25% nans (arbitrary)
attentions = [
t.masked_fill(torch.isnan(t), 0.0) for t in attentions
] # remove them (the test is less complete)
self.assertIsNotNone(multihead_outputs) self.assertIsNotNone(multihead_outputs)
self.assertEqual(len(multihead_outputs), self.model_tester.num_hidden_layers) self.assertEqual(len(multihead_outputs), self.model_tester.num_hidden_layers)
self.assertAlmostEqual( self.assertAlmostEqual(attentions[0][..., 0, :, :].flatten().sum().item(), 0.0)
attentions[0][..., 0, :, :].flatten().sum().item(), 0.0) self.assertNotEqual(attentions[0][..., -1, :, :].flatten().sum().item(), 0.0)
self.assertNotEqual( self.assertNotEqual(attentions[1][..., 0, :, :].flatten().sum().item(), 0.0)
attentions[0][..., -1, :, :].flatten().sum().item(), 0.0) self.assertAlmostEqual(attentions[-1][..., -2, :, :].flatten().sum().item(), 0.0)
self.assertNotEqual( self.assertNotEqual(attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0)
attentions[1][..., 0, :, :].flatten().sum().item(), 0.0)
self.assertAlmostEqual(
attentions[-1][..., -2, :, :].flatten().sum().item(), 0.0)
self.assertNotEqual(
attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0)
def test_head_pruning(self): def test_head_pruning(self):
if not self.test_pruning: if not self.test_pruning:
...@@ -320,20 +338,16 @@ class CommonTestCases: ...@@ -320,20 +338,16 @@ class CommonTestCases:
model = model_class(config=config) model = model_class(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)), heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)), -1: [0]}
-1: [0]}
model.prune_heads(heads_to_prune) model.prune_heads(heads_to_prune)
with torch.no_grad(): with torch.no_grad():
outputs = model(**inputs_dict) outputs = model(**inputs_dict)
attentions = outputs[-1] attentions = outputs[-1]
self.assertEqual( self.assertEqual(attentions[0].shape[-3], 1)
attentions[0].shape[-3], 1) self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads)
self.assertEqual( self.assertEqual(attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
attentions[1].shape[-3], self.model_tester.num_attention_heads)
self.assertEqual(
attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
def test_head_pruning_save_load_from_pretrained(self): def test_head_pruning_save_load_from_pretrained(self):
if not self.test_pruning: if not self.test_pruning:
...@@ -350,8 +364,7 @@ class CommonTestCases: ...@@ -350,8 +364,7 @@ class CommonTestCases:
model = model_class(config=config) model = model_class(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)), heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)), -1: [0]}
-1: [0]}
model.prune_heads(heads_to_prune) model.prune_heads(heads_to_prune)
with TemporaryDirectory() as temp_dir_name: with TemporaryDirectory() as temp_dir_name:
...@@ -366,7 +379,6 @@ class CommonTestCases: ...@@ -366,7 +379,6 @@ class CommonTestCases:
self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads) self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads)
self.assertEqual(attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1) self.assertEqual(attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
def test_head_pruning_save_load_from_config_init(self): def test_head_pruning_save_load_from_config_init(self):
if not self.test_pruning: if not self.test_pruning:
return return
...@@ -380,8 +392,7 @@ class CommonTestCases: ...@@ -380,8 +392,7 @@ class CommonTestCases:
config.output_attentions = True config.output_attentions = True
config.output_hidden_states = False config.output_hidden_states = False
heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)), heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)), -1: [0]}
-1: [0]}
config.pruned_heads = heads_to_prune config.pruned_heads = heads_to_prune
model = model_class(config=config) model = model_class(config=config)
...@@ -446,7 +457,7 @@ class CommonTestCases: ...@@ -446,7 +457,7 @@ class CommonTestCases:
outputs = model(**inputs_dict) outputs = model(**inputs_dict)
attentions = outputs[-1] attentions = outputs[-1]
self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads -1) self.assertEqual(attentions[0].shape[-3], self.model_tester.num_attention_heads - 1)
self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads - 2) self.assertEqual(attentions[1].shape[-3], self.model_tester.num_attention_heads - 2)
self.assertEqual(attentions[2].shape[-3], self.model_tester.num_attention_heads - 2) self.assertEqual(attentions[2].shape[-3], self.model_tester.num_attention_heads - 2)
self.assertEqual(attentions[3].shape[-3], self.model_tester.num_attention_heads) self.assertEqual(attentions[3].shape[-3], self.model_tester.num_attention_heads)
...@@ -470,8 +481,13 @@ class CommonTestCases: ...@@ -470,8 +481,13 @@ class CommonTestCases:
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1) self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
self.assertListEqual( self.assertListEqual(
list(hidden_states[0].shape[-2:]), list(hidden_states[0].shape[-2:]),
[self.model_tester.encoder_seq_length if hasattr(self.model_tester, 'encoder_seq_length') else self.model_tester.seq_length, [
self.model_tester.hidden_size]) self.model_tester.encoder_seq_length
if hasattr(self.model_tester, "encoder_seq_length")
else self.model_tester.seq_length,
self.model_tester.hidden_size,
],
)
def test_resize_tokens_embeddings(self): def test_resize_tokens_embeddings(self):
original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -512,15 +528,10 @@ class CommonTestCases: ...@@ -512,15 +528,10 @@ class CommonTestCases:
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
self.assertIsInstance( self.assertIsInstance(model.get_input_embeddings(), (torch.nn.Embedding, AdaptiveEmbedding))
model.get_input_embeddings(),
(torch.nn.Embedding, AdaptiveEmbedding)
)
model.set_input_embeddings(torch.nn.Embedding(10, 10)) model.set_input_embeddings(torch.nn.Embedding(10, 10))
x = model.get_output_embeddings() x = model.get_output_embeddings()
self.assertTrue( self.assertTrue(x is None or isinstance(x, torch.nn.Linear))
x is None or isinstance(x, torch.nn.Linear)
)
def test_tie_model_weights(self): def test_tie_model_weights(self):
if not self.test_torchscript: if not self.test_torchscript:
...@@ -602,30 +613,30 @@ class CommonTestCases: ...@@ -602,30 +613,30 @@ class CommonTestCases:
outputs = model(**inputs_dict) outputs = model(**inputs_dict)
class GPTModelTester(CommonModelTester): class GPTModelTester(CommonModelTester):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_position_ids=True, use_position_ids=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
n_positions=33, n_positions=33,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
n_choices=3, n_choices=3,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
scope=None, scope=None,
config_class=None, config_class=None,
base_model_class=None, base_model_class=None,
lm_head_model_class=None, lm_head_model_class=None,
double_head_model_class=None, double_head_model_class=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -676,13 +687,14 @@ class CommonTestCases: ...@@ -676,13 +687,14 @@ class CommonTestCases:
n_embd=self.hidden_size, n_embd=self.hidden_size,
n_layer=self.num_hidden_layers, n_layer=self.num_hidden_layers,
n_head=self.num_attention_heads, n_head=self.num_attention_heads,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return (config, input_ids, token_type_ids, position_ids, return (config, input_ids, token_type_ids, position_ids, mc_labels, lm_labels, mc_token_ids)
mc_labels, lm_labels, mc_token_ids)
def create_and_check_base_model(self, config, input_ids, token_type_ids, position_ids, def create_and_check_base_model(
mc_labels, lm_labels, mc_token_ids): self, config, input_ids, token_type_ids, position_ids, mc_labels, lm_labels, mc_token_ids
):
model = self.base_model_class(config) model = self.base_model_class(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -694,12 +706,12 @@ class CommonTestCases: ...@@ -694,12 +706,12 @@ class CommonTestCases:
hidden_state = outputs[0] hidden_state = outputs[0]
self.parent.assertListEqual( self.parent.assertListEqual(
list(hidden_state.size()), list(hidden_state.size()), [self.batch_size, self.n_choices, self.seq_length, self.hidden_size]
[self.batch_size, self.n_choices, self.seq_length, self.hidden_size]) )
def create_and_check_lm_head(
def create_and_check_lm_head(self, config, input_ids, token_type_ids, position_ids, self, config, input_ids, token_type_ids, position_ids, mc_labels, lm_labels, mc_token_ids
mc_labels, lm_labels, mc_token_ids): ):
model = self.lm_head_model_class(config) model = self.lm_head_model_class(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -709,14 +721,13 @@ class CommonTestCases: ...@@ -709,14 +721,13 @@ class CommonTestCases:
total_voc = self.vocab_size total_voc = self.vocab_size
self.parent.assertListEqual( self.parent.assertListEqual(
list(lm_logits.size()), list(lm_logits.size()), [self.batch_size, self.n_choices, self.seq_length, total_voc]
[self.batch_size, self.n_choices, self.seq_length, total_voc]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(loss.size()), [])
list(loss.size()),
[])
def create_and_check_presents(self, config, input_ids, token_type_ids, position_ids, def create_and_check_presents(
mc_labels, lm_labels, mc_token_ids): self, config, input_ids, token_type_ids, position_ids, mc_labels, lm_labels, mc_token_ids
):
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
model = model_class(config) model = model_class(config)
model.to(torch_device) model.to(torch_device)
...@@ -727,30 +738,39 @@ class CommonTestCases: ...@@ -727,30 +738,39 @@ class CommonTestCases:
self.parent.assertEqual(self.num_hidden_layers, len(presents)) self.parent.assertEqual(self.num_hidden_layers, len(presents))
self.parent.assertListEqual( self.parent.assertListEqual(
list(presents[0].size()), list(presents[0].size()),
[2, self.batch_size * self.n_choices, self.num_attention_heads, [
self.seq_length, self.hidden_size // self.num_attention_heads]) 2,
self.batch_size * self.n_choices,
self.num_attention_heads,
self.seq_length,
self.hidden_size // self.num_attention_heads,
],
)
def create_and_check_double_heads(self, config, input_ids, token_type_ids, position_ids, def create_and_check_double_heads(
mc_labels, lm_labels, mc_token_ids): self, config, input_ids, token_type_ids, position_ids, mc_labels, lm_labels, mc_token_ids
):
model = self.double_head_model_class(config) model = self.double_head_model_class(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
outputs = model(input_ids, mc_token_ids, lm_labels=lm_labels, mc_labels=mc_labels, outputs = model(
token_type_ids=token_type_ids, position_ids=position_ids) input_ids,
mc_token_ids,
lm_labels=lm_labels,
mc_labels=mc_labels,
token_type_ids=token_type_ids,
position_ids=position_ids,
)
lm_loss, mc_loss, lm_logits, mc_logits = outputs[:4] lm_loss, mc_loss, lm_logits, mc_logits = outputs[:4]
loss = [lm_loss, mc_loss] loss = [lm_loss, mc_loss]
total_voc = self.vocab_size total_voc = self.vocab_size
self.parent.assertListEqual( self.parent.assertListEqual(
list(lm_logits.size()), list(lm_logits.size()), [self.batch_size, self.n_choices, self.seq_length, total_voc]
[self.batch_size, self.n_choices, self.seq_length, total_voc]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(mc_logits.size()), [self.batch_size, self.n_choices])
list(mc_logits.size()), self.parent.assertListEqual([list(l.size()) for l in loss], [[], []])
[self.batch_size, self.n_choices])
self.parent.assertListEqual(
[list(l.size()) for l in loss],
[[], []])
def create_and_check_model_from_pretrained(self): def create_and_check_model_from_pretrained(self):
for model_name in list(self.base_model_class.pretrained_model_archive_map.keys())[:1]: for model_name in list(self.base_model_class.pretrained_model_archive_map.keys())[:1]:
...@@ -759,9 +779,8 @@ class CommonTestCases: ...@@ -759,9 +779,8 @@ class CommonTestCases:
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, position_ids, (config, input_ids, token_type_ids, position_ids, mc_labels, lm_labels, mc_token_ids) = config_and_inputs
mc_labels, lm_labels, mc_token_ids) = config_and_inputs inputs_dict = {"input_ids": input_ids}
inputs_dict = {'input_ids': input_ids}
return config, inputs_dict return config, inputs_dict
def run_common_tests(self, test_presents=False): def run_common_tests(self, test_presents=False):
...@@ -791,10 +810,10 @@ class ConfigTester(object): ...@@ -791,10 +810,10 @@ class ConfigTester(object):
def create_and_test_config_common_properties(self): def create_and_test_config_common_properties(self):
config = self.config_class(**self.inputs_dict) config = self.config_class(**self.inputs_dict)
self.parent.assertTrue(hasattr(config, 'vocab_size')) self.parent.assertTrue(hasattr(config, "vocab_size"))
self.parent.assertTrue(hasattr(config, 'hidden_size')) self.parent.assertTrue(hasattr(config, "hidden_size"))
self.parent.assertTrue(hasattr(config, 'num_attention_heads')) self.parent.assertTrue(hasattr(config, "num_attention_heads"))
self.parent.assertTrue(hasattr(config, 'num_hidden_layers')) self.parent.assertTrue(hasattr(config, "num_hidden_layers"))
def create_and_test_config_to_json_string(self): def create_and_test_config_to_json_string(self):
config = self.config_class(**self.inputs_dict) config = self.config_class(**self.inputs_dict)
......
...@@ -11,24 +11,21 @@ ...@@ -11,24 +11,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
import pdb
from transformers import is_torch_available from transformers import is_torch_available
if is_torch_available():
from transformers import (CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_MAP,
CTRLLMHeadModel)
from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_common_test import CommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available():
from transformers import CTRLConfig, CTRLModel, CTRL_PRETRAINED_MODEL_ARCHIVE_MAP, CTRLLMHeadModel
@require_torch @require_torch
class CTRLModelTest(CommonTestCases.CommonModelTester): class CTRLModelTest(CommonTestCases.CommonModelTester):
...@@ -39,32 +36,32 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): ...@@ -39,32 +36,32 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
test_head_masking = False test_head_masking = False
class CTRLModelTester(object): class CTRLModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -129,12 +126,20 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): ...@@ -129,12 +126,20 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels return (
config,
input_ids,
input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
)
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = CTRLModel(config=config) model = CTRLModel(config=config)
...@@ -150,8 +155,8 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): ...@@ -150,8 +155,8 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
"presents": presents, "presents": presents,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertEqual(len(result["presents"]), config.n_layer) self.parent.assertEqual(len(result["presents"]), config.n_layer)
def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
...@@ -161,29 +166,28 @@ class CTRLModelTest(CommonTestCases.CommonModelTester): ...@@ -161,29 +166,28 @@ class CTRLModelTest(CommonTestCases.CommonModelTester):
loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
result = { result = {"loss": loss, "lm_logits": lm_logits}
"loss": loss, self.parent.assertListEqual(list(result["loss"].size()), [])
"lm_logits": lm_logits
}
self.parent.assertListEqual(
list(result["loss"].size()),
[])
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["lm_logits"].size()), list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, head_mask, token_type_ids, (
mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs config,
input_ids,
inputs_dict = { input_mask,
'input_ids': input_ids, head_mask,
'token_type_ids': token_type_ids, token_type_ids,
'head_mask': head_mask mc_token_ids,
} sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "head_mask": head_mask}
return config, inputs_dict return config, inputs_dict
......
...@@ -12,60 +12,67 @@ ...@@ -12,60 +12,67 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
if is_torch_available():
from transformers import (DistilBertConfig, DistilBertModel, DistilBertForMaskedLM,
DistilBertForTokenClassification,
DistilBertForQuestionAnswering, DistilBertForSequenceClassification)
from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_torch, slow, torch_device from .modeling_common_test import CommonTestCases, ids_tensor
from .utils import require_torch, torch_device
if is_torch_available():
from transformers import (
DistilBertConfig,
DistilBertModel,
DistilBertForMaskedLM,
DistilBertForTokenClassification,
DistilBertForQuestionAnswering,
DistilBertForSequenceClassification,
)
@require_torch @require_torch
class DistilBertModelTest(CommonTestCases.CommonModelTester): class DistilBertModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (DistilBertModel, DistilBertForMaskedLM, DistilBertForQuestionAnswering, all_model_classes = (
DistilBertForSequenceClassification) if is_torch_available() else None (DistilBertModel, DistilBertForMaskedLM, DistilBertForQuestionAnswering, DistilBertForSequenceClassification)
if is_torch_available()
else None
)
test_pruning = True test_pruning = True
test_torchscript = True test_torchscript = True
test_resize_embeddings = True test_resize_embeddings = True
test_head_masking = True test_head_masking = True
class DistilBertModelTester(object): class DistilBertModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=False, use_token_type_ids=False,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -114,16 +121,17 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -114,16 +121,17 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
dropout=self.hidden_dropout_prob, dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob, attention_dropout=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_distilbert_model(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_model(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = DistilBertModel(config=config) model = DistilBertModel(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -134,10 +142,12 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -134,10 +142,12 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
"sequence_output": sequence_output, "sequence_output": sequence_output,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_distilbert_for_masked_lm(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_masked_lm(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = DistilBertForMaskedLM(config=config) model = DistilBertForMaskedLM(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -147,29 +157,31 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -147,29 +157,31 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].size()), list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_distilbert_for_question_answering(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_question_answering(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = DistilBertForQuestionAnswering(config=config) model = DistilBertForQuestionAnswering(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, start_logits, end_logits = model(input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels) loss, start_logits, end_logits = model(
input_ids, attention_mask=input_mask, start_positions=sequence_labels, end_positions=sequence_labels
)
result = { result = {
"loss": loss, "loss": loss,
"start_logits": start_logits, "start_logits": start_logits,
"end_logits": end_logits, "end_logits": end_logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["start_logits"].size()), [self.batch_size, self.seq_length])
list(result["start_logits"].size()), self.parent.assertListEqual(list(result["end_logits"].size()), [self.batch_size, self.seq_length])
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].size()),
[self.batch_size, self.seq_length])
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_distilbert_for_sequence_classification(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_sequence_classification(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = DistilBertForSequenceClassification(config) model = DistilBertForSequenceClassification(config)
model.to(torch_device) model.to(torch_device)
...@@ -179,12 +191,12 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -179,12 +191,12 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].size()), [self.batch_size, self.num_labels])
list(result["logits"].size()),
[self.batch_size, self.num_labels])
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_distilbert_for_token_classification(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_token_classification(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = DistilBertForTokenClassification(config=config) model = DistilBertForTokenClassification(config=config)
model.to(torch_device) model.to(torch_device)
...@@ -196,14 +208,14 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -196,14 +208,14 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].size()), list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
[self.batch_size, self.seq_length, self.num_labels]) )
self.check_loss_output(result) self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids, 'attention_mask': input_mask} inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -239,5 +251,6 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester): ...@@ -239,5 +251,6 @@ class DistilBertModelTest(CommonTestCases.CommonModelTester):
# model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR) # model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# self.assertIsNotNone(model) # self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -17,8 +17,10 @@ import logging ...@@ -17,8 +17,10 @@ import logging
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from .utils import require_torch, slow from .utils import require_torch, slow
if is_torch_available(): if is_torch_available():
from transformers import BertModel, BertForMaskedLM, Model2Model from transformers import BertModel, BertForMaskedLM, Model2Model
from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP from transformers.modeling_bert import BERT_PRETRAINED_MODEL_ARCHIVE_MAP
...@@ -39,13 +41,13 @@ class EncoderDecoderModelTest(unittest.TestCase): ...@@ -39,13 +41,13 @@ class EncoderDecoderModelTest(unittest.TestCase):
def test_model2model_from_pretrained_not_bert(self): def test_model2model_from_pretrained_not_bert(self):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
_ = Model2Model.from_pretrained('roberta') _ = Model2Model.from_pretrained("roberta")
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
_ = Model2Model.from_pretrained('distilbert') _ = Model2Model.from_pretrained("distilbert")
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
_ = Model2Model.from_pretrained('does-not-exist') _ = Model2Model.from_pretrained("does-not-exist")
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -12,55 +12,59 @@ ...@@ -12,55 +12,59 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
if is_torch_available():
from transformers import (GPT2Config, GPT2Model, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
GPT2LMHeadModel, GPT2DoubleHeadsModel)
from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_common_test import CommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available():
from transformers import (
GPT2Config,
GPT2Model,
GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
GPT2LMHeadModel,
GPT2DoubleHeadsModel,
)
@require_torch @require_torch
class GPT2ModelTest(CommonTestCases.CommonModelTester): class GPT2ModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else () all_model_classes = (GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
class GPT2ModelTester(object): class GPT2ModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -125,12 +129,20 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester): ...@@ -125,12 +129,20 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels return (
config,
input_ids,
input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
)
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = GPT2Model(config=config) model = GPT2Model(config=config)
...@@ -146,8 +158,8 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester): ...@@ -146,8 +158,8 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
"presents": presents, "presents": presents,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertEqual(len(result["presents"]), config.n_layer) self.parent.assertEqual(len(result["presents"]), config.n_layer)
def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
...@@ -157,63 +169,58 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester): ...@@ -157,63 +169,58 @@ class GPT2ModelTest(CommonTestCases.CommonModelTester):
loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) loss, lm_logits, _ = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
result = { result = {"loss": loss, "lm_logits": lm_logits}
"loss": loss,
"lm_logits": lm_logits
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["loss"].size()), list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[]) )
self.parent.assertListEqual(
list(result["lm_logits"].size()),
[self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_double_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args): def create_and_check_double_lm_head_model(
self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args
):
model = GPT2DoubleHeadsModel(config) model = GPT2DoubleHeadsModel(config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous() multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
inputs = {'input_ids': multiple_choice_inputs_ids, inputs = {
'mc_token_ids': mc_token_ids, "input_ids": multiple_choice_inputs_ids,
'attention_mask': multiple_choice_input_mask, "mc_token_ids": mc_token_ids,
'token_type_ids': multiple_choice_token_type_ids, "attention_mask": multiple_choice_input_mask,
'lm_labels': multiple_choice_inputs_ids} "token_type_ids": multiple_choice_token_type_ids,
"lm_labels": multiple_choice_inputs_ids,
}
loss, lm_logits, mc_logits, _ = model(**inputs) loss, lm_logits, mc_logits, _ = model(**inputs)
result = { result = {"loss": loss, "lm_logits": lm_logits, "mc_logits": mc_logits}
"loss": loss,
"lm_logits": lm_logits,
"mc_logits": mc_logits
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["loss"].size()), list(result["lm_logits"].size()), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size]
[]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(result["mc_logits"].size()), [self.batch_size, self.num_choices])
list(result["lm_logits"].size()),
[self.batch_size, self.num_choices, self.seq_length, self.vocab_size])
self.parent.assertListEqual(
list(result["mc_logits"].size()),
[self.batch_size, self.num_choices])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, head_mask, token_type_ids, (
mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs config,
input_ids,
inputs_dict = { input_mask,
'input_ids': input_ids, head_mask,
'token_type_ids': token_type_ids, token_type_ids,
'head_mask': head_mask mc_token_ids,
} sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "head_mask": head_mask}
return config, inputs_dict return config, inputs_dict
......
...@@ -12,53 +12,59 @@ ...@@ -12,53 +12,59 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
if is_torch_available():
from transformers import (OpenAIGPTConfig, OpenAIGPTModel, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel)
from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_common_test import CommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_torch, slow, torch_device from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available():
from transformers import (
OpenAIGPTConfig,
OpenAIGPTModel,
OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
OpenAIGPTLMHeadModel,
OpenAIGPTDoubleHeadsModel,
)
@require_torch @require_torch
class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) if is_torch_available() else () all_model_classes = (
(OpenAIGPTModel, OpenAIGPTLMHeadModel, OpenAIGPTDoubleHeadsModel) if is_torch_available() else ()
)
class OpenAIGPTModelTester(object): class OpenAIGPTModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -116,9 +122,7 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): ...@@ -116,9 +122,7 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
return config, input_ids, head_mask, token_type_ids, sequence_labels, token_labels, choice_labels return config, input_ids, head_mask, token_type_ids, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_openai_gpt_model(self, config, input_ids, head_mask, token_type_ids, *args): def create_and_check_openai_gpt_model(self, config, input_ids, head_mask, token_type_ids, *args):
model = OpenAIGPTModel(config=config) model = OpenAIGPTModel(config=config)
...@@ -129,12 +133,10 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): ...@@ -129,12 +133,10 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
model(input_ids, token_type_ids=token_type_ids) model(input_ids, token_type_ids=token_type_ids)
(sequence_output,) = model(input_ids) (sequence_output,) = model(input_ids)
result = { result = {"sequence_output": sequence_output}
"sequence_output": sequence_output
}
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args): def create_and_check_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args):
model = OpenAIGPTLMHeadModel(config) model = OpenAIGPTLMHeadModel(config)
...@@ -143,17 +145,12 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): ...@@ -143,17 +145,12 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
loss, lm_logits = model(input_ids, token_type_ids=token_type_ids, labels=input_ids) loss, lm_logits = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
result = { result = {"loss": loss, "lm_logits": lm_logits}
"loss": loss,
"lm_logits": lm_logits
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["loss"].size()), list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[]) )
self.parent.assertListEqual(
list(result["lm_logits"].size()),
[self.batch_size, self.seq_length, self.vocab_size])
def create_and_check_double_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args): def create_and_check_double_lm_head_model(self, config, input_ids, head_mask, token_type_ids, *args):
model = OpenAIGPTDoubleHeadsModel(config) model = OpenAIGPTDoubleHeadsModel(config)
...@@ -162,26 +159,25 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester): ...@@ -162,26 +159,25 @@ class OpenAIGPTModelTest(CommonTestCases.CommonModelTester):
loss, lm_logits, mc_logits = model(input_ids, token_type_ids=token_type_ids, lm_labels=input_ids) loss, lm_logits, mc_logits = model(input_ids, token_type_ids=token_type_ids, lm_labels=input_ids)
result = { result = {"loss": loss, "lm_logits": lm_logits}
"loss": loss,
"lm_logits": lm_logits
}
self.parent.assertListEqual(list(result["loss"].size()), [])
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["loss"].size()), list(result["lm_logits"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[]) )
self.parent.assertListEqual(
list(result["lm_logits"].size()),
[self.batch_size, self.seq_length, self.vocab_size])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, head_mask, token_type_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs (
inputs_dict = { config,
'input_ids': input_ids, input_ids,
'token_type_ids': token_type_ids, head_mask,
'head_mask': head_mask token_type_ids,
} sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "head_mask": head_mask}
return config, inputs_dict return config, inputs_dict
......
...@@ -12,25 +12,29 @@ ...@@ -12,25 +12,29 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from .configuration_common_test import ConfigTester
from .modeling_common_test import CommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_torch, slow, torch_device
if is_torch_available(): if is_torch_available():
import torch import torch
from transformers import (RobertaConfig, RobertaModel, RobertaForMaskedLM, from transformers import (
RobertaForSequenceClassification, RobertaForTokenClassification) RobertaConfig,
RobertaModel,
RobertaForMaskedLM,
RobertaForSequenceClassification,
RobertaForTokenClassification,
)
from transformers.modeling_roberta import RobertaEmbeddings from transformers.modeling_roberta import RobertaEmbeddings
from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_common_test import (CommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_torch, slow, torch_device
@require_torch @require_torch
class RobertaModelTest(CommonTestCases.CommonModelTester): class RobertaModelTest(CommonTestCases.CommonModelTester):
...@@ -38,31 +42,31 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -38,31 +42,31 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
all_model_classes = (RobertaForMaskedLM, RobertaModel) if is_torch_available() else () all_model_classes = (RobertaForMaskedLM, RobertaModel) if is_torch_available() else ()
class RobertaModelTester(object): class RobertaModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -116,17 +120,17 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -116,17 +120,17 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
attention_probs_dropout_prob=self.attention_probs_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[])
def create_and_check_roberta_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, def create_and_check_roberta_model(
token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = RobertaModel(config=config) model = RobertaModel(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
...@@ -139,47 +143,59 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -139,47 +143,59 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
"pooled_output": pooled_output, "pooled_output": pooled_output,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].size()), list(result["sequence_output"].size()), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].size()), [self.batch_size, self.hidden_size])
def create_and_check_roberta_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, def create_and_check_roberta_for_masked_lm(
token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = RobertaForMaskedLM(config=config) model = RobertaForMaskedLM(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, prediction_scores = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels) loss, prediction_scores = model(
input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, masked_lm_labels=token_labels
)
result = { result = {
"loss": loss, "loss": loss,
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].size()), list(result["prediction_scores"].size()), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.check_loss_output(result) self.check_loss_output(result)
def create_and_check_roberta_for_token_classification(self, config, input_ids, token_type_ids, input_mask, def create_and_check_roberta_for_token_classification(
sequence_labels, token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = RobertaForTokenClassification(config=config) model = RobertaForTokenClassification(config=config)
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
loss, logits = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, loss, logits = model(
labels=token_labels) input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels
)
result = { result = {
"loss": loss, "loss": loss,
"logits": logits, "logits": logits,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].size()), list(result["logits"].size()), [self.batch_size, self.seq_length, self.num_labels]
[self.batch_size, self.seq_length, self.num_labels]) )
self.check_loss_output(result) self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask, (
sequence_labels, token_labels, choice_labels) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -214,18 +230,12 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -214,18 +230,12 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
model = RobertaEmbeddings(config=config) model = RobertaEmbeddings(config=config)
input_ids = torch.as_tensor([[12, 31, 13, model.padding_idx]]) input_ids = torch.as_tensor([[12, 31, 13, model.padding_idx]])
expected_positions = torch.as_tensor([[ expected_positions = torch.as_tensor(
0 + model.padding_idx + 1, [[0 + model.padding_idx + 1, 1 + model.padding_idx + 1, 2 + model.padding_idx + 1, model.padding_idx]]
1 + model.padding_idx + 1, )
2 + model.padding_idx + 1,
model.padding_idx
]])
position_ids = model.create_position_ids_from_input_ids(input_ids) position_ids = model.create_position_ids_from_input_ids(input_ids)
self.assertEqual( self.assertEqual(position_ids.shape, expected_positions.shape)
position_ids.shape,
expected_positions.shape
)
self.assertTrue(torch.all(torch.eq(position_ids, expected_positions))) self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))
def test_create_position_ids_from_inputs_embeds(self): def test_create_position_ids_from_inputs_embeds(self):
...@@ -247,69 +257,47 @@ class RobertaModelTest(CommonTestCases.CommonModelTester): ...@@ -247,69 +257,47 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
] ]
expected_positions = torch.as_tensor([expected_single_positions, expected_single_positions]) expected_positions = torch.as_tensor([expected_single_positions, expected_single_positions])
position_ids = embeddings.create_position_ids_from_inputs_embeds(inputs_embeds) position_ids = embeddings.create_position_ids_from_inputs_embeds(inputs_embeds)
self.assertEqual( self.assertEqual(position_ids.shape, expected_positions.shape)
position_ids.shape, self.assertTrue(torch.all(torch.eq(position_ids, expected_positions)))
expected_positions.shape
)
self.assertTrue(
torch.all(torch.eq(position_ids, expected_positions))
)
class RobertaModelIntegrationTest(unittest.TestCase): class RobertaModelIntegrationTest(unittest.TestCase):
@slow @slow
def test_inference_masked_lm(self): def test_inference_masked_lm(self):
model = RobertaForMaskedLM.from_pretrained('roberta-base') model = RobertaForMaskedLM.from_pretrained("roberta-base")
input_ids = torch.tensor([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
expected_shape = torch.Size((1, 11, 50265)) expected_shape = torch.Size((1, 11, 50265))
self.assertEqual( self.assertEqual(output.shape, expected_shape)
output.shape,
expected_shape
)
# compare the actual values for a slice. # compare the actual values for a slice.
expected_slice = torch.Tensor( expected_slice = torch.Tensor(
[[[33.8843, -4.3107, 22.7779], [[[33.8843, -4.3107, 22.7779], [4.6533, -2.8099, 13.6252], [1.8222, -3.6898, 8.8600]]]
[ 4.6533, -2.8099, 13.6252],
[ 1.8222, -3.6898, 8.8600]]]
)
self.assertTrue(
torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3)
) )
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
@slow @slow
def test_inference_no_head(self): def test_inference_no_head(self):
model = RobertaModel.from_pretrained('roberta-base') model = RobertaModel.from_pretrained("roberta-base")
input_ids = torch.tensor([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
# compare the actual values for a slice. # compare the actual values for a slice.
expected_slice = torch.Tensor( expected_slice = torch.Tensor(
[[[-0.0231, 0.0782, 0.0074], [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]]
[-0.1854, 0.0539, -0.0174],
[ 0.0548, 0.0799, 0.1687]]]
)
self.assertTrue(
torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3)
) )
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
@slow @slow
def test_inference_classification_head(self): def test_inference_classification_head(self):
model = RobertaForSequenceClassification.from_pretrained('roberta-large-mnli') model = RobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
input_ids = torch.tensor([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
expected_shape = torch.Size((1, 3)) expected_shape = torch.Size((1, 3))
self.assertEqual( self.assertEqual(output.shape, expected_shape)
output.shape, expected_tensor = torch.Tensor([[-0.9469, 0.3913, 0.5118]])
expected_shape self.assertTrue(torch.allclose(output, expected_tensor, atol=1e-3))
)
expected_tensor = torch.Tensor([[-0.9469, 0.3913, 0.5118]])
self.assertTrue(
torch.allclose(output, expected_tensor, atol=1e-3)
)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -12,20 +12,19 @@ ...@@ -12,20 +12,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from .modeling_common_test import (CommonTestCases, ids_tensor, floats_tensor)
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_torch, slow, torch_device from .modeling_common_test import CommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_torch, slow
if is_torch_available(): if is_torch_available():
from transformers import (T5Config, T5Model, T5WithLMHeadModel) from transformers import T5Config, T5Model, T5WithLMHeadModel
from transformers.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP from transformers.modeling_t5 import T5_PRETRAINED_MODEL_ARCHIVE_MAP
...@@ -39,26 +38,26 @@ class T5ModelTest(CommonTestCases.CommonModelTester): ...@@ -39,26 +38,26 @@ class T5ModelTest(CommonTestCases.CommonModelTester):
is_encoder_decoder = True is_encoder_decoder = True
class T5ModelTester(object): class T5ModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
encoder_seq_length=7, encoder_seq_length=7,
decoder_seq_length=9, decoder_seq_length=9,
is_training=True, is_training=True,
use_attention_mask=True, use_attention_mask=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
n_positions=14, n_positions=14,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
d_ff=37, d_ff=37,
relative_attention_num_buckets=8, relative_attention_num_buckets=8,
dropout_rate=0.1, dropout_rate=0.1,
initializer_factor=0.002, initializer_factor=0.002,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.encoder_seq_length = encoder_seq_length self.encoder_seq_length = encoder_seq_length
...@@ -101,60 +100,96 @@ class T5ModelTest(CommonTestCases.CommonModelTester): ...@@ -101,60 +100,96 @@ class T5ModelTest(CommonTestCases.CommonModelTester):
num_heads=self.num_attention_heads, num_heads=self.num_attention_heads,
relative_attention_num_buckets=self.relative_attention_num_buckets, relative_attention_num_buckets=self.relative_attention_num_buckets,
dropout_rate=self.dropout_rate, dropout_rate=self.dropout_rate,
initializer_factor=self.initializer_factor) initializer_factor=self.initializer_factor,
)
return (config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, decoder_attention_mask, decoder_lm_labels)
return (
config,
encoder_input_ids,
decoder_input_ids,
encoder_attention_mask,
decoder_attention_mask,
decoder_lm_labels,
)
def check_loss_output(self, result): def check_loss_output(self, result):
self.parent.assertListEqual( self.parent.assertListEqual(list(result["loss"].size()), [])
list(result["loss"].size()),
[]) def create_and_check_t5_model(
self,
def create_and_check_t5_model(self, config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, decoder_attention_mask, decoder_lm_labels): config,
encoder_input_ids,
decoder_input_ids,
encoder_attention_mask,
decoder_attention_mask,
decoder_lm_labels,
):
model = T5Model(config=config) model = T5Model(config=config)
model.eval() model.eval()
decoder_output, encoder_output = model(encoder_input_ids=encoder_input_ids, decoder_output, encoder_output = model(
decoder_input_ids=decoder_input_ids, encoder_input_ids=encoder_input_ids,
encoder_attention_mask=encoder_attention_mask, decoder_input_ids=decoder_input_ids,
decoder_attention_mask=decoder_attention_mask) encoder_attention_mask=encoder_attention_mask,
decoder_output, encoder_output = model(encoder_input_ids=encoder_input_ids, decoder_attention_mask=decoder_attention_mask,
decoder_input_ids=decoder_input_ids) )
decoder_output, encoder_output = model(
encoder_input_ids=encoder_input_ids, decoder_input_ids=decoder_input_ids
)
result = { result = {
"encoder_output": encoder_output, "encoder_output": encoder_output,
"decoder_output": decoder_output, "decoder_output": decoder_output,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["encoder_output"].size()), list(result["encoder_output"].size()), [self.batch_size, self.encoder_seq_length, self.hidden_size]
[self.batch_size, self.encoder_seq_length, self.hidden_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["decoder_output"].size()), list(result["decoder_output"].size()), [self.batch_size, self.decoder_seq_length, self.hidden_size]
[self.batch_size, self.decoder_seq_length, self.hidden_size]) )
def create_and_check_t5_with_lm_head(
def create_and_check_t5_with_lm_head(self, config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, decoder_attention_mask, decoder_lm_labels): self,
config,
encoder_input_ids,
decoder_input_ids,
encoder_attention_mask,
decoder_attention_mask,
decoder_lm_labels,
):
model = T5WithLMHeadModel(config=config) model = T5WithLMHeadModel(config=config)
model.eval() model.eval()
outputs = model(encoder_input_ids=encoder_input_ids, decoder_input_ids=decoder_input_ids, outputs = model(
decoder_attention_mask=decoder_attention_mask, decoder_lm_labels=decoder_lm_labels) encoder_input_ids=encoder_input_ids,
decoder_input_ids=decoder_input_ids,
decoder_attention_mask=decoder_attention_mask,
decoder_lm_labels=decoder_lm_labels,
)
loss, prediction_scores = outputs[0], outputs[1] loss, prediction_scores = outputs[0], outputs[1]
result = { result = {
"loss": loss, "loss": loss,
"prediction_scores": prediction_scores, "prediction_scores": prediction_scores,
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].size()), list(result["prediction_scores"].size()), [self.batch_size, self.decoder_seq_length, self.vocab_size]
[self.batch_size, self.decoder_seq_length, self.vocab_size]) )
self.check_loss_output(result) self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, encoder_input_ids, decoder_input_ids, encoder_attention_mask, (
decoder_attention_mask, decoder_lm_labels) = config_and_inputs config,
inputs_dict = {'encoder_input_ids': encoder_input_ids, encoder_input_ids,
'decoder_input_ids': decoder_input_ids, decoder_input_ids,
'decoder_attention_mask': decoder_attention_mask, encoder_attention_mask,
'encoder_attention_mask': encoder_attention_mask} decoder_attention_mask,
decoder_lm_labels,
) = config_and_inputs
inputs_dict = {
"encoder_input_ids": encoder_input_ids,
"decoder_input_ids": decoder_input_ids,
"decoder_attention_mask": decoder_attention_mask,
"encoder_attention_mask": encoder_attention_mask,
}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -178,5 +213,6 @@ class T5ModelTest(CommonTestCases.CommonModelTester): ...@@ -178,5 +213,6 @@ class T5ModelTest(CommonTestCases.CommonModelTester):
model = T5Model.from_pretrained(model_name, cache_dir=CACHE_DIR) model = T5Model.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -12,62 +12,60 @@ ...@@ -12,62 +12,60 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from transformers import AlbertConfig, is_tf_available
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import AlbertConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf from transformers.modeling_tf_albert import (
from transformers.modeling_tf_albert import (TFAlbertModel, TFAlbertForMaskedLM, TFAlbertModel,
TFAlbertForSequenceClassification, TFAlbertForMaskedLM,
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP) TFAlbertForSequenceClassification,
TF_ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
@require_tf @require_tf
class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = ( all_model_classes = (
TFAlbertModel, (TFAlbertModel, TFAlbertForMaskedLM, TFAlbertForSequenceClassification) if is_tf_available() else ()
TFAlbertForMaskedLM, )
TFAlbertForSequenceClassification
) if is_tf_available() else ()
class TFAlbertModelTester(object): class TFAlbertModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
embedding_size=16, embedding_size=16,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -93,27 +91,22 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -93,27 +91,22 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
self.scope = scope self.scope = scope
def prepare_config_and_inputs(self): def prepare_config_and_inputs(self):
input_ids = ids_tensor( input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
[self.batch_size, self.seq_length], self.vocab_size)
input_mask = None input_mask = None
if self.use_input_mask: if self.use_input_mask:
input_mask = ids_tensor( input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
[self.batch_size, self.seq_length], vocab_size=2)
token_type_ids = None token_type_ids = None
if self.use_token_type_ids: if self.use_token_type_ids:
token_type_ids = ids_tensor( token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
[self.batch_size, self.seq_length], self.type_vocab_size)
sequence_labels = None sequence_labels = None
token_labels = None token_labels = None
choice_labels = None choice_labels = None
if self.use_labels: if self.use_labels:
sequence_labels = ids_tensor( sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
[self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
token_labels = ids_tensor(
[self.batch_size, self.seq_length], self.num_labels)
choice_labels = ids_tensor([self.batch_size], self.num_choices) choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = AlbertConfig( config = AlbertConfig(
...@@ -127,19 +120,20 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -127,19 +120,20 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
attention_probs_dropout_prob=self.attention_probs_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def create_and_check_albert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_albert_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFAlbertModel(config=config) model = TFAlbertModel(config=config)
# inputs = {'input_ids': input_ids, # inputs = {'input_ids': input_ids,
# 'attention_mask': input_mask, # 'attention_mask': input_mask,
# 'token_type_ids': token_type_ids} # 'token_type_ids': token_type_ids}
# sequence_output, pooled_output = model(**inputs) # sequence_output, pooled_output = model(**inputs)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output, pooled_output = model(inputs) sequence_output, pooled_output = model(inputs)
inputs = [input_ids, input_mask] inputs = [input_ids, input_mask]
...@@ -152,50 +146,52 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -152,50 +146,52 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
"pooled_output": pooled_output.numpy(), "pooled_output": pooled_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual(list(result["pooled_output"].shape), [ self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size])
self.batch_size, self.hidden_size])
def create_and_check_albert_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_albert_for_masked_lm(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFAlbertForMaskedLM(config=config) model = TFAlbertForMaskedLM(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (prediction_scores,) = model(inputs)
'token_type_ids': token_type_ids}
prediction_scores, = model(inputs)
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_albert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_albert_for_sequence_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = TFAlbertForSequenceClassification(config=config) model = TFAlbertForSequenceClassification(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (logits,) = model(inputs)
'token_type_ids': token_type_ids}
logits, = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
list(result["logits"].shape),
[self.batch_size, self.num_labels])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask, (
sequence_labels, token_labels, choice_labels) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, input_ids,
'token_type_ids': token_type_ids, 'attention_mask': input_mask} token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
self.model_tester = TFAlbertModelTest.TFAlbertModelTester(self) self.model_tester = TFAlbertModelTest.TFAlbertModelTester(self)
self.config_tester = ConfigTester( self.config_tester = ConfigTester(self, config_class=AlbertConfig, hidden_size=37)
self, config_class=AlbertConfig, hidden_size=37)
def test_config(self): def test_config(self):
self.config_tester.run_common_tests() self.config_tester.run_common_tests()
...@@ -206,13 +202,11 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -206,13 +202,11 @@ class TFAlbertModelTest(TFCommonTestCases.TFCommonModelTester):
def test_for_masked_lm(self): def test_for_masked_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_albert_for_masked_lm( self.model_tester.create_and_check_albert_for_masked_lm(*config_and_inputs)
*config_and_inputs)
def test_for_sequence_classification(self): def test_for_sequence_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_albert_for_sequence_classification( self.model_tester.create_and_check_albert_for_sequence_classification(*config_and_inputs)
*config_and_inputs)
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
......
...@@ -12,28 +12,29 @@ ...@@ -12,28 +12,29 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest
import shutil
import logging import logging
import unittest
from transformers import is_tf_available from transformers import is_tf_available
from .utils import require_tf, slow, SMALL_MODEL_IDENTIFIER from .utils import SMALL_MODEL_IDENTIFIER, require_tf, slow
if is_tf_available():
from transformers import (AutoConfig, BertConfig,
TFAutoModel, TFBertModel,
TFAutoModelWithLMHead, TFBertForMaskedLM,
TFAutoModelForSequenceClassification, TFBertForSequenceClassification,
TFAutoModelForQuestionAnswering, TFBertForQuestionAnswering)
from transformers.modeling_tf_bert import TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_common_test import (CommonTestCases, ids_tensor) if is_tf_available():
from .configuration_common_test import ConfigTester from transformers import (
AutoConfig,
BertConfig,
TFAutoModel,
TFBertModel,
TFAutoModelWithLMHead,
TFBertForMaskedLM,
TFAutoModelForSequenceClassification,
TFBertForSequenceClassification,
TFAutoModelForQuestionAnswering,
TFBertForQuestionAnswering,
)
@require_tf @require_tf
...@@ -41,11 +42,12 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -41,11 +42,12 @@ class TFAutoModelTest(unittest.TestCase):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
import h5py import h5py
self.assertTrue(h5py.version.hdf5_version.startswith("1.10")) self.assertTrue(h5py.version.hdf5_version.startswith("1.10"))
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
...@@ -58,7 +60,7 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -58,7 +60,7 @@ class TFAutoModelTest(unittest.TestCase):
def test_lmhead_model_from_pretrained(self): def test_lmhead_model_from_pretrained(self):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
...@@ -71,7 +73,7 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -71,7 +73,7 @@ class TFAutoModelTest(unittest.TestCase):
def test_sequence_classification_model_from_pretrained(self): def test_sequence_classification_model_from_pretrained(self):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
...@@ -84,7 +86,7 @@ class TFAutoModelTest(unittest.TestCase): ...@@ -84,7 +86,7 @@ class TFAutoModelTest(unittest.TestCase):
def test_question_answering_model_from_pretrained(self): def test_question_answering_model_from_pretrained(self):
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ["bert-base-uncased"]:
config = AutoConfig.from_pretrained(model_name) config = AutoConfig.from_pretrained(model_name)
self.assertIsNotNone(config) self.assertIsNotNone(config)
self.assertIsInstance(config, BertConfig) self.assertIsInstance(config, BertConfig)
......
...@@ -12,64 +12,74 @@ ...@@ -12,64 +12,74 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from transformers import BertConfig, is_tf_available
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import BertConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_bert import (TFBertModel, TFBertForMaskedLM, from transformers.modeling_tf_bert import (
TFBertForNextSentencePrediction, TFBertModel,
TFBertForPreTraining, TFBertForMaskedLM,
TFBertForSequenceClassification, TFBertForNextSentencePrediction,
TFBertForMultipleChoice, TFBertForPreTraining,
TFBertForTokenClassification, TFBertForSequenceClassification,
TFBertForQuestionAnswering, TFBertForMultipleChoice,
TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP) TFBertForTokenClassification,
TFBertForQuestionAnswering,
)
@require_tf @require_tf
class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFBertModel, TFBertForMaskedLM, TFBertForNextSentencePrediction, all_model_classes = (
TFBertForPreTraining, TFBertForQuestionAnswering, TFBertForSequenceClassification, (
TFBertForTokenClassification) if is_tf_available() else () TFBertModel,
TFBertForMaskedLM,
TFBertForNextSentencePrediction,
TFBertForPreTraining,
TFBertForQuestionAnswering,
TFBertForSequenceClassification,
TFBertForTokenClassification,
)
if is_tf_available()
else ()
)
class TFBertModelTester(object): class TFBertModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -123,15 +133,16 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -123,15 +133,16 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
attention_probs_dropout_prob=self.attention_probs_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def create_and_check_bert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_model(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFBertModel(config=config) model = TFBertModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output, pooled_output = model(inputs) sequence_output, pooled_output = model(inputs)
inputs = [input_ids, input_mask] inputs = [input_ids, input_mask]
...@@ -144,128 +155,119 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -144,128 +155,119 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
"pooled_output": pooled_output.numpy(), "pooled_output": pooled_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size]) self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size])
def create_and_check_bert_for_masked_lm(
def create_and_check_bert_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFBertForMaskedLM(config=config) model = TFBertForMaskedLM(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (prediction_scores,) = model(inputs)
'token_type_ids': token_type_ids}
prediction_scores, = model(inputs)
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_bert_for_next_sequence_prediction(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_next_sequence_prediction(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFBertForNextSentencePrediction(config=config) model = TFBertForNextSentencePrediction(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (seq_relationship_score,) = model(inputs)
'token_type_ids': token_type_ids}
seq_relationship_score, = model(inputs)
result = { result = {
"seq_relationship_score": seq_relationship_score.numpy(), "seq_relationship_score": seq_relationship_score.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2])
list(result["seq_relationship_score"].shape),
[self.batch_size, 2])
def create_and_check_bert_for_pretraining(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_pretraining(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFBertForPreTraining(config=config) model = TFBertForPreTraining(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
prediction_scores, seq_relationship_score = model(inputs) prediction_scores, seq_relationship_score = model(inputs)
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
"seq_relationship_score": seq_relationship_score.numpy(), "seq_relationship_score": seq_relationship_score.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(result["seq_relationship_score"].shape), [self.batch_size, 2])
list(result["seq_relationship_score"].shape),
[self.batch_size, 2])
def create_and_check_bert_for_sequence_classification(
def create_and_check_bert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = TFBertForSequenceClassification(config=config) model = TFBertForSequenceClassification(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (logits,) = model(inputs)
'token_type_ids': token_type_ids}
logits, = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
list(result["logits"].shape),
[self.batch_size, self.num_labels])
def create_and_check_bert_for_multiple_choice(
def create_and_check_bert_for_multiple_choice(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_choices = self.num_choices config.num_choices = self.num_choices
model = TFBertForMultipleChoice(config=config) model = TFBertForMultipleChoice(config=config)
multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
inputs = {'input_ids': multiple_choice_inputs_ids, inputs = {
'attention_mask': multiple_choice_input_mask, "input_ids": multiple_choice_inputs_ids,
'token_type_ids': multiple_choice_token_type_ids} "attention_mask": multiple_choice_input_mask,
logits, = model(inputs) "token_type_ids": multiple_choice_token_type_ids,
}
(logits,) = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_choices])
list(result["logits"].shape),
[self.batch_size, self.num_choices])
def create_and_check_bert_for_token_classification(
def create_and_check_bert_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = TFBertForTokenClassification(config=config) model = TFBertForTokenClassification(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (logits,) = model(inputs)
'token_type_ids': token_type_ids}
logits, = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].shape), list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
[self.batch_size, self.seq_length, self.num_labels]) )
def create_and_check_bert_for_question_answering(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_bert_for_question_answering(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFBertForQuestionAnswering(config=config) model = TFBertForQuestionAnswering(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
start_logits, end_logits = model(inputs) start_logits, end_logits = model(inputs)
result = { result = {
"start_logits": start_logits.numpy(), "start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(), "end_logits": end_logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
list(result["start_logits"].shape), self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].shape),
[self.batch_size, self.seq_length])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask, (
sequence_labels, token_labels, choice_labels) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -310,10 +312,10 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -310,10 +312,10 @@ class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
@slow @slow
def test_model_from_pretrained(self): def test_model_from_pretrained(self):
# for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]: # for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
for model_name in ['bert-base-uncased']: for model_name in ["bert-base-uncased"]:
model = TFBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR) model = TFBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -14,53 +14,52 @@ ...@@ -14,53 +14,52 @@
# limitations under the License. # limitations under the License.
from __future__ import absolute_import, division, print_function from __future__ import absolute_import, division, print_function
import os
import copy import copy
import json import os
import logging
import importlib
import random import random
import shutil import shutil
import unittest
import uuid
import tempfile
import sys import sys
import tempfile
import unittest
from transformers import is_tf_available, is_torch_available from transformers import is_tf_available, is_torch_available
from .utils import require_tf, slow from .utils import require_tf
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
from transformers import TFPreTrainedModel
# from transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP # from transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
if sys.version_info[0] == 2: if sys.version_info[0] == 2:
import cPickle as pickle
class TemporaryDirectory(object): class TemporaryDirectory(object):
"""Context manager for tempfile.mkdtemp() so it's usable with "with" statement.""" """Context manager for tempfile.mkdtemp() so it's usable with "with" statement."""
def __enter__(self): def __enter__(self):
self.name = tempfile.mkdtemp() self.name = tempfile.mkdtemp()
return self.name return self.name
def __exit__(self, exc_type, exc_value, traceback): def __exit__(self, exc_type, exc_value, traceback):
shutil.rmtree(self.name) shutil.rmtree(self.name)
else: else:
import pickle
TemporaryDirectory = tempfile.TemporaryDirectory TemporaryDirectory = tempfile.TemporaryDirectory
unicode = str unicode = str
def _config_zero_init(config): def _config_zero_init(config):
configs_no_init = copy.deepcopy(config) configs_no_init = copy.deepcopy(config)
for key in configs_no_init.__dict__.keys(): for key in configs_no_init.__dict__.keys():
if '_range' in key or '_std' in key: if "_range" in key or "_std" in key:
setattr(configs_no_init, key, 0.0) setattr(configs_no_init, key, 0.0)
return configs_no_init return configs_no_init
class TFCommonTestCases:
class TFCommonTestCases:
@require_tf @require_tf
class TFCommonModelTester(unittest.TestCase): class TFCommonModelTester(unittest.TestCase):
...@@ -126,8 +125,9 @@ class TFCommonTestCases: ...@@ -126,8 +125,9 @@ class TFCommonTestCases:
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval() pt_model.eval()
pt_inputs_dict = dict((name, torch.from_numpy(key.numpy()).to(torch.long)) pt_inputs_dict = dict(
for name, key in inputs_dict.items()) (name, torch.from_numpy(key.numpy()).to(torch.long)) for name, key in inputs_dict.items()
)
with torch.no_grad(): with torch.no_grad():
pto = pt_model(**pt_inputs_dict) pto = pt_model(**pt_inputs_dict)
tfo = tf_model(inputs_dict, training=False) tfo = tf_model(inputs_dict, training=False)
...@@ -140,18 +140,19 @@ class TFCommonTestCases: ...@@ -140,18 +140,19 @@ class TFCommonTestCases:
# Check we can load pt model in tf and vice-versa with checkpoint => model functions # Check we can load pt model in tf and vice-versa with checkpoint => model functions
with TemporaryDirectory() as tmpdirname: with TemporaryDirectory() as tmpdirname:
pt_checkpoint_path = os.path.join(tmpdirname, 'pt_model.bin') pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
torch.save(pt_model.state_dict(), pt_checkpoint_path) torch.save(pt_model.state_dict(), pt_checkpoint_path)
tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path) tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)
tf_checkpoint_path = os.path.join(tmpdirname, 'tf_model.h5') tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
tf_model.save_weights(tf_checkpoint_path) tf_model.save_weights(tf_checkpoint_path)
pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path) pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)
# Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
pt_model.eval() pt_model.eval()
pt_inputs_dict = dict((name, torch.from_numpy(key.numpy()).to(torch.long)) pt_inputs_dict = dict(
for name, key in inputs_dict.items()) (name, torch.from_numpy(key.numpy()).to(torch.long)) for name, key in inputs_dict.items()
)
with torch.no_grad(): with torch.no_grad():
pto = pt_model(**pt_inputs_dict) pto = pt_model(**pt_inputs_dict)
tfo = tf_model(inputs_dict) tfo = tf_model(inputs_dict)
...@@ -166,13 +167,19 @@ class TFCommonTestCases: ...@@ -166,13 +167,19 @@ class TFCommonTestCases:
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
if self.is_encoder_decoder: if self.is_encoder_decoder:
input_ids = {'decoder_input_ids': tf.keras.Input(batch_shape=(2, 2000), name='decoder_input_ids', dtype='int32'), input_ids = {
'encoder_input_ids': tf.keras.Input(batch_shape=(2, 2000), name='encoder_input_ids', dtype='int32')} "decoder_input_ids": tf.keras.Input(
batch_shape=(2, 2000), name="decoder_input_ids", dtype="int32"
),
"encoder_input_ids": tf.keras.Input(
batch_shape=(2, 2000), name="encoder_input_ids", dtype="int32"
),
}
else: else:
input_ids = tf.keras.Input(batch_shape=(2, 2000), name='input_ids', dtype='int32') input_ids = tf.keras.Input(batch_shape=(2, 2000), name="input_ids", dtype="int32")
optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0) optimizer = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08, clipnorm=1.0)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy') metric = tf.keras.metrics.SparseCategoricalAccuracy("accuracy")
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
# Prepare our model # Prepare our model
...@@ -188,7 +195,7 @@ class TFCommonTestCases: ...@@ -188,7 +195,7 @@ class TFCommonTestCases:
hidden_states = outputs_dict[0] hidden_states = outputs_dict[0]
# Add a dense layer on top to test intetgration with other keras modules # Add a dense layer on top to test intetgration with other keras modules
outputs = tf.keras.layers.Dense(2, activation='softmax', name='outputs')(hidden_states) outputs = tf.keras.layers.Dense(2, activation="softmax", name="outputs")(hidden_states)
# Compile extended model # Compile extended model
extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs]) extended_model = tf.keras.Model(inputs=[input_ids], outputs=[outputs])
...@@ -202,7 +209,9 @@ class TFCommonTestCases: ...@@ -202,7 +209,9 @@ class TFCommonTestCases:
outputs_dict = model(inputs_dict) outputs_dict = model(inputs_dict)
inputs_keywords = copy.deepcopy(inputs_dict) inputs_keywords = copy.deepcopy(inputs_dict)
input_ids = inputs_keywords.pop('input_ids' if not self.is_encoder_decoder else 'decoder_input_ids', None) input_ids = inputs_keywords.pop(
"input_ids" if not self.is_encoder_decoder else "decoder_input_ids", None
)
outputs_keywords = model(input_ids, **inputs_keywords) outputs_keywords = model(input_ids, **inputs_keywords)
output_dict = outputs_dict[0].numpy() output_dict = outputs_dict[0].numpy()
...@@ -213,10 +222,22 @@ class TFCommonTestCases: ...@@ -213,10 +222,22 @@ class TFCommonTestCases:
def test_attention_outputs(self): def test_attention_outputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
decoder_seq_length = self.model_tester.decoder_seq_length if hasattr(self.model_tester, 'decoder_seq_length') else self.model_tester.seq_length decoder_seq_length = (
encoder_seq_length = self.model_tester.encoder_seq_length if hasattr(self.model_tester, 'encoder_seq_length') else self.model_tester.seq_length self.model_tester.decoder_seq_length
decoder_key_length = self.model_tester.key_length if hasattr(self.model_tester, 'key_length') else decoder_seq_length if hasattr(self.model_tester, "decoder_seq_length")
encoder_key_length = self.model_tester.key_length if hasattr(self.model_tester, 'key_length') else encoder_seq_length else self.model_tester.seq_length
)
encoder_seq_length = (
self.model_tester.encoder_seq_length
if hasattr(self.model_tester, "encoder_seq_length")
else self.model_tester.seq_length
)
decoder_key_length = (
self.model_tester.key_length if hasattr(self.model_tester, "key_length") else decoder_seq_length
)
encoder_key_length = (
self.model_tester.key_length if hasattr(self.model_tester, "key_length") else encoder_seq_length
)
for model_class in self.all_model_classes: for model_class in self.all_model_classes:
config.output_attentions = True config.output_attentions = True
...@@ -229,22 +250,20 @@ class TFCommonTestCases: ...@@ -229,22 +250,20 @@ class TFCommonTestCases:
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual( self.assertListEqual(
list(attentions[0].shape[-3:]), list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
encoder_seq_length, )
encoder_key_length])
out_len = len(outputs) out_len = len(outputs)
if self.is_encoder_decoder: if self.is_encoder_decoder:
self.assertEqual(out_len % 2, 0) self.assertEqual(out_len % 2, 0)
decoder_attentions = outputs[(out_len // 2)-1] decoder_attentions = outputs[(out_len // 2) - 1]
self.assertEqual(model.config.output_attentions, True) self.assertEqual(model.config.output_attentions, True)
self.assertEqual(model.config.output_hidden_states, False) self.assertEqual(model.config.output_hidden_states, False)
self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual( self.assertListEqual(
list(decoder_attentions[0].shape[-3:]), list(decoder_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length],
decoder_seq_length, )
decoder_key_length])
# Check attention is always last and order is fine # Check attention is always last and order is fine
config.output_attentions = True config.output_attentions = True
...@@ -259,9 +278,8 @@ class TFCommonTestCases: ...@@ -259,9 +278,8 @@ class TFCommonTestCases:
self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
self.assertListEqual( self.assertListEqual(
list(attentions[0].shape[-3:]), list(attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
encoder_seq_length, )
encoder_key_length])
def test_hidden_states_output(self): def test_hidden_states_output(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -276,8 +294,8 @@ class TFCommonTestCases: ...@@ -276,8 +294,8 @@ class TFCommonTestCases:
self.assertEqual(model.config.output_hidden_states, True) self.assertEqual(model.config.output_hidden_states, True)
self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1) self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
self.assertListEqual( self.assertListEqual(
list(hidden_states[0].shape[-2:]), list(hidden_states[0].shape[-2:]), [self.model_tester.seq_length, self.model_tester.hidden_size]
[self.model_tester.seq_length, self.model_tester.hidden_size]) )
def test_model_common_attributes(self): def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
...@@ -307,13 +325,13 @@ class TFCommonTestCases: ...@@ -307,13 +325,13 @@ class TFCommonTestCases:
# We used to fall back to just synthetically creating a dummy tensor of ones: # We used to fall back to just synthetically creating a dummy tensor of ones:
try: try:
x = wte(input_ids, mode="embedding") x = wte(input_ids, mode="embedding")
except: except Exception:
try: try:
x = wte([input_ids], mode="embedding") x = wte([input_ids], mode="embedding")
except: except Exception:
try: try:
x = wte([input_ids, None, None, None], mode="embedding") x = wte([input_ids, None, None, None], mode="embedding")
except: except Exception:
if hasattr(self.model_tester, "embedding_size"): if hasattr(self.model_tester, "embedding_size"):
x = tf.ones(input_ids.shape + [self.model_tester.embedding_size], dtype=tf.dtypes.float32) x = tf.ones(input_ids.shape + [self.model_tester.embedding_size], dtype=tf.dtypes.float32)
else: else:
...@@ -357,9 +375,7 @@ def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None): ...@@ -357,9 +375,7 @@ def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None):
for _ in range(total_dims): for _ in range(total_dims):
values.append(rng.randint(0, vocab_size - 1)) values.append(rng.randint(0, vocab_size - 1))
output = tf.constant(values, output = tf.constant(values, shape=shape, dtype=dtype if dtype is not None else tf.int32)
shape=shape,
dtype=dtype if dtype is not None else tf.int32)
return output return output
......
...@@ -12,23 +12,19 @@ ...@@ -12,23 +12,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from transformers import CTRLConfig, is_tf_available
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import CTRLConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf from transformers.modeling_tf_ctrl import TFCTRLModel, TFCTRLLMHeadModel, TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers.modeling_tf_ctrl import (TFCTRLModel, TFCTRLLMHeadModel,
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_MAP)
@require_tf @require_tf
...@@ -37,32 +33,32 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -37,32 +33,32 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFCTRLModel, TFCTRLLMHeadModel) if is_tf_available() else () all_model_classes = (TFCTRLModel, TFCTRLLMHeadModel) if is_tf_available() else ()
class TFCTRLModelTester(object): class TFCTRLModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -127,13 +123,21 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -127,13 +123,21 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels return (
config,
input_ids,
input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
)
def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_ctrl_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFCTRLModel(config=config) model = TFCTRLModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output = model(inputs)[0] sequence_output = model(inputs)[0]
inputs = [input_ids, None, input_mask] # None is the input for 'past' inputs = [input_ids, None, input_mask] # None is the input for 'past'
...@@ -145,30 +149,36 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -145,30 +149,36 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_ctrl_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_ctrl_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFCTRLLMHeadModel(config=config) model = TFCTRLLMHeadModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
prediction_scores = model(inputs)[0] prediction_scores = model(inputs)[0]
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, head_mask, token_type_ids, (
mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs config,
input_ids,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -192,6 +202,6 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -192,6 +202,6 @@ class TFCTRLModelTest(TFCommonTestCases.TFCommonModelTester):
model = TFCTRLModel.from_pretrained(model_name, cache_dir=CACHE_DIR) model = TFCTRLModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -12,62 +12,70 @@ ...@@ -12,62 +12,70 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from transformers import DistilBertConfig, is_tf_available
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .utils import CACHE_DIR, require_tf, slow from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .utils import require_tf
from transformers import DistilBertConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf from transformers.modeling_tf_distilbert import (
from transformers.modeling_tf_distilbert import (TFDistilBertModel, TFDistilBertModel,
TFDistilBertForMaskedLM, TFDistilBertForMaskedLM,
TFDistilBertForQuestionAnswering, TFDistilBertForQuestionAnswering,
TFDistilBertForSequenceClassification) TFDistilBertForSequenceClassification,
)
@require_tf @require_tf
class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFDistilBertModel, TFDistilBertForMaskedLM, TFDistilBertForQuestionAnswering, all_model_classes = (
TFDistilBertForSequenceClassification) if is_tf_available() else None (
TFDistilBertModel,
TFDistilBertForMaskedLM,
TFDistilBertForQuestionAnswering,
TFDistilBertForSequenceClassification,
)
if is_tf_available()
else None
)
test_pruning = True test_pruning = True
test_torchscript = True test_torchscript = True
test_resize_embeddings = True test_resize_embeddings = True
test_head_masking = True test_head_masking = True
class TFDistilBertModelTester(object): class TFDistilBertModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=False, use_token_type_ids=False,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -116,14 +124,16 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -116,14 +124,16 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
dropout=self.hidden_dropout_prob, dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob, attention_dropout=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
def create_and_check_distilbert_model(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_model(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFDistilBertModel(config=config) model = TFDistilBertModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask}
'attention_mask': input_mask}
outputs = model(inputs) outputs = model(inputs)
sequence_output = outputs[0] sequence_output = outputs[0]
...@@ -136,54 +146,51 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -136,54 +146,51 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_distilbert_for_masked_lm(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_masked_lm(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFDistilBertForMaskedLM(config=config) model = TFDistilBertForMaskedLM(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask}
'attention_mask': input_mask}
(prediction_scores,) = model(inputs) (prediction_scores,) = model(inputs)
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_distilbert_for_question_answering(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_question_answering(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFDistilBertForQuestionAnswering(config=config) model = TFDistilBertForQuestionAnswering(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask}
'attention_mask': input_mask}
start_logits, end_logits = model(inputs) start_logits, end_logits = model(inputs)
result = { result = {
"start_logits": start_logits.numpy(), "start_logits": start_logits.numpy(),
"end_logits": end_logits.numpy(), "end_logits": end_logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["start_logits"].shape), [self.batch_size, self.seq_length])
list(result["start_logits"].shape), self.parent.assertListEqual(list(result["end_logits"].shape), [self.batch_size, self.seq_length])
[self.batch_size, self.seq_length])
self.parent.assertListEqual(
list(result["end_logits"].shape),
[self.batch_size, self.seq_length])
def create_and_check_distilbert_for_sequence_classification(self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_distilbert_for_sequence_classification(
self, config, input_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = TFDistilBertForSequenceClassification(config) model = TFDistilBertForSequenceClassification(config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask}
'attention_mask': input_mask}
(logits,) = model(inputs) (logits,) = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(list(result["logits"].shape), [self.batch_size, self.num_labels])
list(result["logits"].shape),
[self.batch_size, self.num_labels])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs (config, input_ids, input_mask, sequence_labels, token_labels, choice_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids, 'attention_mask': input_mask} inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -215,5 +222,6 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -215,5 +222,6 @@ class TFDistilBertModelTest(TFCommonTestCases.TFCommonModelTester):
# model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR) # model = DistilBertModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
# self.assertIsNotNone(model) # self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -12,60 +12,60 @@ ...@@ -12,60 +12,60 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from transformers import GPT2Config, is_tf_available
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import GPT2Config, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_gpt2 import (TFGPT2Model, TFGPT2LMHeadModel, from transformers.modeling_tf_gpt2 import (
TFGPT2DoubleHeadsModel, TFGPT2Model,
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP) TFGPT2LMHeadModel,
TFGPT2DoubleHeadsModel,
TF_GPT2_PRETRAINED_MODEL_ARCHIVE_MAP,
)
@require_tf @require_tf
class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel, all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel, TFGPT2DoubleHeadsModel) if is_tf_available() else ()
TFGPT2DoubleHeadsModel) if is_tf_available() else ()
# all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel) if is_tf_available() else () # all_model_classes = (TFGPT2Model, TFGPT2LMHeadModel) if is_tf_available() else ()
class TFGPT2ModelTester(object): class TFGPT2ModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -130,13 +130,21 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -130,13 +130,21 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels return (
config,
input_ids,
input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
)
def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFGPT2Model(config=config) model = TFGPT2Model(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output = model(inputs)[0] sequence_output = model(inputs)[0]
inputs = [input_ids, None, input_mask] # None is the input for 'past' inputs = [input_ids, None, input_mask] # None is the input for 'past'
...@@ -148,54 +156,58 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -148,54 +156,58 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_gpt2_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_gpt2_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFGPT2LMHeadModel(config=config) model = TFGPT2LMHeadModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
prediction_scores = model(inputs)[0] prediction_scores = model(inputs)[0]
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_gpt2_double_head(self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args): def create_and_check_gpt2_double_head(
self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args
):
model = TFGPT2DoubleHeadsModel(config=config) model = TFGPT2DoubleHeadsModel(config=config)
multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
inputs = {'input_ids': multiple_choice_inputs_ids, inputs = {
'mc_token_ids': mc_token_ids, "input_ids": multiple_choice_inputs_ids,
'attention_mask': multiple_choice_input_mask, "mc_token_ids": mc_token_ids,
'token_type_ids': multiple_choice_token_type_ids} "attention_mask": multiple_choice_input_mask,
lm_logits, mc_logits = model(inputs)[:2] "token_type_ids": multiple_choice_token_type_ids,
result = {
"lm_logits": lm_logits.numpy(),
"mc_logits": mc_logits.numpy()
} }
lm_logits, mc_logits = model(inputs)[:2]
result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()}
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["lm_logits"].shape), list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size]
[self.batch_size, self.num_choices, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(result["mc_logits"].shape), [self.batch_size, self.num_choices])
list(result["mc_logits"].shape),
[self.batch_size, self.num_choices])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, head_mask, token_type_ids, (
mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs config,
input_ids,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -223,6 +235,6 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -223,6 +235,6 @@ class TFGPT2ModelTest(TFCommonTestCases.TFCommonModelTester):
model = TFGPT2Model.from_pretrained(model_name, cache_dir=CACHE_DIR) model = TFGPT2Model.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -12,59 +12,61 @@ ...@@ -12,59 +12,61 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
import sys
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from transformers import OpenAIGPTConfig, is_tf_available
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import OpenAIGPTConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
from transformers.modeling_tf_openai import (TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, from transformers.modeling_tf_openai import (
TFOpenAIGPTDoubleHeadsModel, TFOpenAIGPTModel,
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP) TFOpenAIGPTLMHeadModel,
TFOpenAIGPTDoubleHeadsModel,
TF_OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP,
)
@require_tf @require_tf
class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, all_model_classes = (
TFOpenAIGPTDoubleHeadsModel) if is_tf_available() else () (TFOpenAIGPTModel, TFOpenAIGPTLMHeadModel, TFOpenAIGPTDoubleHeadsModel) if is_tf_available() else ()
)
class TFOpenAIGPTModelTester(object): class TFOpenAIGPTModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_token_type_ids=True, use_token_type_ids=True,
use_input_mask=True, use_input_mask=True,
use_labels=True, use_labels=True,
use_mc_token_ids=True, use_mc_token_ids=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -129,13 +131,21 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -129,13 +131,21 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2) head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels return (
config,
input_ids,
input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
)
def create_and_check_openai_gpt_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_openai_gpt_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFOpenAIGPTModel(config=config) model = TFOpenAIGPTModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output = model(inputs)[0] sequence_output = model(inputs)[0]
inputs = [input_ids, input_mask] inputs = [input_ids, input_mask]
...@@ -147,54 +157,58 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -147,54 +157,58 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_openai_gpt_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args): def create_and_check_openai_gpt_lm_head(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
model = TFOpenAIGPTLMHeadModel(config=config) model = TFOpenAIGPTLMHeadModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
prediction_scores = model(inputs)[0] prediction_scores = model(inputs)[0]
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_openai_gpt_double_head(self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args): def create_and_check_openai_gpt_double_head(
self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args
):
model = TFOpenAIGPTDoubleHeadsModel(config=config) model = TFOpenAIGPTDoubleHeadsModel(config=config)
multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1)) multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1)) multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1)) multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
inputs = {'input_ids': multiple_choice_inputs_ids, inputs = {
'mc_token_ids': mc_token_ids, "input_ids": multiple_choice_inputs_ids,
'attention_mask': multiple_choice_input_mask, "mc_token_ids": mc_token_ids,
'token_type_ids': multiple_choice_token_type_ids} "attention_mask": multiple_choice_input_mask,
lm_logits, mc_logits = model(inputs)[:2] "token_type_ids": multiple_choice_token_type_ids,
result = {
"lm_logits": lm_logits.numpy(),
"mc_logits": mc_logits.numpy()
} }
lm_logits, mc_logits = model(inputs)[:2]
result = {"lm_logits": lm_logits.numpy(), "mc_logits": mc_logits.numpy()}
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["lm_logits"].shape), list(result["lm_logits"].shape), [self.batch_size, self.num_choices, self.seq_length, self.vocab_size]
[self.batch_size, self.num_choices, self.seq_length, self.vocab_size]) )
self.parent.assertListEqual( self.parent.assertListEqual(list(result["mc_logits"].shape), [self.batch_size, self.num_choices])
list(result["mc_logits"].shape),
[self.batch_size, self.num_choices])
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, input_mask, head_mask, token_type_ids, (
mc_token_ids, sequence_labels, token_labels, choice_labels) = config_and_inputs config,
input_ids,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_mask,
head_mask,
token_type_ids,
mc_token_ids,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -222,6 +236,6 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -222,6 +236,6 @@ class TFOpenAIGPTModelTest(TFCommonTestCases.TFCommonModelTester):
model = TFOpenAIGPTModel.from_pretrained(model_name, cache_dir=CACHE_DIR) model = TFOpenAIGPTModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
self.assertIsNotNone(model) self.assertIsNotNone(model)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -12,59 +12,62 @@ ...@@ -12,59 +12,62 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import, division, print_function
from __future__ import division
from __future__ import print_function
import unittest import unittest
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor) from transformers import RobertaConfig, is_tf_available
from .configuration_common_test import ConfigTester from .configuration_common_test import ConfigTester
from .modeling_tf_common_test import TFCommonTestCases, ids_tensor
from .utils import CACHE_DIR, require_tf, slow from .utils import CACHE_DIR, require_tf, slow
from transformers import RobertaConfig, is_tf_available
if is_tf_available(): if is_tf_available():
import tensorflow as tf import tensorflow as tf
import numpy import numpy
from transformers.modeling_tf_roberta import (TFRobertaModel, TFRobertaForMaskedLM, from transformers.modeling_tf_roberta import (
TFRobertaForSequenceClassification, TFRobertaModel,
TFRobertaForTokenClassification, TFRobertaForMaskedLM,
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP) TFRobertaForSequenceClassification,
TFRobertaForTokenClassification,
TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP,
)
@require_tf @require_tf
class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFRobertaModel,TFRobertaForMaskedLM, all_model_classes = (
TFRobertaForSequenceClassification) if is_tf_available() else () (TFRobertaModel, TFRobertaForMaskedLM, TFRobertaForSequenceClassification) if is_tf_available() else ()
)
class TFRobertaModelTester(object): class TFRobertaModelTester(object):
def __init__(
def __init__(self, self,
parent, parent,
batch_size=13, batch_size=13,
seq_length=7, seq_length=7,
is_training=True, is_training=True,
use_input_mask=True, use_input_mask=True,
use_token_type_ids=True, use_token_type_ids=True,
use_labels=True, use_labels=True,
vocab_size=99, vocab_size=99,
hidden_size=32, hidden_size=32,
num_hidden_layers=5, num_hidden_layers=5,
num_attention_heads=4, num_attention_heads=4,
intermediate_size=37, intermediate_size=37,
hidden_act="gelu", hidden_act="gelu",
hidden_dropout_prob=0.1, hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1, attention_probs_dropout_prob=0.1,
max_position_embeddings=512, max_position_embeddings=512,
type_vocab_size=16, type_vocab_size=16,
type_sequence_label_size=2, type_sequence_label_size=2,
initializer_range=0.02, initializer_range=0.02,
num_labels=3, num_labels=3,
num_choices=4, num_choices=4,
scope=None, scope=None,
): ):
self.parent = parent self.parent = parent
self.batch_size = batch_size self.batch_size = batch_size
self.seq_length = seq_length self.seq_length = seq_length
...@@ -118,16 +121,16 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -118,16 +121,16 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
attention_probs_dropout_prob=self.attention_probs_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings, max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size, type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range) initializer_range=self.initializer_range,
)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def create_and_check_roberta_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, def create_and_check_roberta_model(
token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFRobertaModel(config=config) model = TFRobertaModel(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output = model(inputs)[0] sequence_output = model(inputs)[0]
inputs = [input_ids, input_mask] inputs = [input_ids, input_mask]
...@@ -139,39 +142,47 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -139,39 +142,47 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
"sequence_output": sequence_output.numpy(), "sequence_output": sequence_output.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["sequence_output"].shape), list(result["sequence_output"].shape), [self.batch_size, self.seq_length, self.hidden_size]
[self.batch_size, self.seq_length, self.hidden_size]) )
def create_and_check_roberta_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, def create_and_check_roberta_for_masked_lm(
token_labels, choice_labels): self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
model = TFRobertaForMaskedLM(config=config) model = TFRobertaForMaskedLM(config=config)
prediction_scores = model([input_ids, input_mask, token_type_ids])[0] prediction_scores = model([input_ids, input_mask, token_type_ids])[0]
result = { result = {
"prediction_scores": prediction_scores.numpy(), "prediction_scores": prediction_scores.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["prediction_scores"].shape), list(result["prediction_scores"].shape), [self.batch_size, self.seq_length, self.vocab_size]
[self.batch_size, self.seq_length, self.vocab_size]) )
def create_and_check_roberta_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): def create_and_check_roberta_for_token_classification(
self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
):
config.num_labels = self.num_labels config.num_labels = self.num_labels
model = TFRobertaForTokenClassification(config=config) model = TFRobertaForTokenClassification(config=config)
inputs = {'input_ids': input_ids, inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
'attention_mask': input_mask, (logits,) = model(inputs)
'token_type_ids': token_type_ids}
logits, = model(inputs)
result = { result = {
"logits": logits.numpy(), "logits": logits.numpy(),
} }
self.parent.assertListEqual( self.parent.assertListEqual(
list(result["logits"].shape), list(result["logits"].shape), [self.batch_size, self.seq_length, self.num_labels]
[self.batch_size, self.seq_length, self.num_labels]) )
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask, (
sequence_labels, token_labels, choice_labels) = config_and_inputs config,
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask} input_ids,
token_type_ids,
input_mask,
sequence_labels,
token_labels,
choice_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
def setUp(self): def setUp(self):
...@@ -196,61 +207,43 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester): ...@@ -196,61 +207,43 @@ class TFRobertaModelTest(TFCommonTestCases.TFCommonModelTester):
self.assertIsNotNone(model) self.assertIsNotNone(model)
class TFRobertaModelIntegrationTest(unittest.TestCase): class TFRobertaModelIntegrationTest(unittest.TestCase):
@slow @slow
def test_inference_masked_lm(self): def test_inference_masked_lm(self):
model = TFRobertaForMaskedLM.from_pretrained('roberta-base') model = TFRobertaForMaskedLM.from_pretrained("roberta-base")
input_ids = tf.constant([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
expected_shape = [1, 11, 50265] expected_shape = [1, 11, 50265]
self.assertEqual( self.assertEqual(list(output.numpy().shape), expected_shape)
list(output.numpy().shape),
expected_shape
)
# compare the actual values for a slice. # compare the actual values for a slice.
expected_slice = tf.constant( expected_slice = tf.constant(
[[[33.8843, -4.3107, 22.7779], [[[33.8843, -4.3107, 22.7779], [4.6533, -2.8099, 13.6252], [1.8222, -3.6898, 8.8600]]]
[ 4.6533, -2.8099, 13.6252],
[ 1.8222, -3.6898, 8.8600]]]
)
self.assertTrue(
numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-3)
) )
self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-3))
@slow @slow
def test_inference_no_head(self): def test_inference_no_head(self):
model = TFRobertaModel.from_pretrained('roberta-base') model = TFRobertaModel.from_pretrained("roberta-base")
input_ids = tf.constant([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
# compare the actual values for a slice. # compare the actual values for a slice.
expected_slice = tf.constant( expected_slice = tf.constant(
[[[-0.0231, 0.0782, 0.0074], [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]]
[-0.1854, 0.0539, -0.0174],
[ 0.0548, 0.0799, 0.1687]]]
)
self.assertTrue(
numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-3)
) )
self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-3))
@slow @slow
def test_inference_classification_head(self): def test_inference_classification_head(self):
model = TFRobertaForSequenceClassification.from_pretrained('roberta-large-mnli') model = TFRobertaForSequenceClassification.from_pretrained("roberta-large-mnli")
input_ids = tf.constant([[ 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]]) input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
output = model(input_ids)[0] output = model(input_ids)[0]
expected_shape = [1, 3] expected_shape = [1, 3]
self.assertEqual( self.assertEqual(list(output.numpy().shape), expected_shape)
list(output.numpy().shape), expected_tensor = tf.constant([[-0.9469, 0.3913, 0.5118]])
expected_shape self.assertTrue(numpy.allclose(output.numpy(), expected_tensor.numpy(), atol=1e-3))
)
expected_tensor = tf.constant([[-0.9469, 0.3913, 0.5118]])
self.assertTrue(
numpy.allclose(output.numpy(), expected_tensor.numpy(), atol=1e-3)
)
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment