Commit 11fae9e6 authored by thomwolf's avatar thomwolf
Browse files

add tf bert files

parent d77abd4d
......@@ -79,6 +79,9 @@ def url_to_filename(url, etag=None):
Convert `url` into a hashed filename in a repeatable way.
If `etag` is specified, append its hash to the url's, delimited
by a period.
If the url ends with .h5 (Keras HDF5 weights) ands '.h5' to the name
so that TF 2.0 can identify it as a HDF5 file
(see https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1380)
"""
url_bytes = url.encode('utf-8')
url_hash = sha256(url_bytes)
......@@ -89,6 +92,9 @@ def url_to_filename(url, etag=None):
etag_hash = sha256(etag_bytes)
filename += '.' + etag_hash.hexdigest()
if url.endswith('.h5'):
filename += '.h5'
return filename
......
This diff is collapsed.
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TF general model utils."""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import logging
import os
import tensorflow as tf
from .configuration_utils import PretrainedConfig
from .file_utils import cached_path, WEIGHTS_NAME, TF_WEIGHTS_NAME
logger = logging.getLogger(__name__)
class TFPreTrainedModel(tf.keras.Model):
r""" Base class for all TF models.
:class:`~pytorch_transformers.TFPreTrainedModel` takes care of storing the configuration of the models and handles methods for loading/downloading/saving models
as well as a few methods commons to all models to (i) resize the input embeddings and (ii) prune heads in the self-attention heads.
Class attributes (overridden by derived classes):
- ``config_class``: a class derived from :class:`~pytorch_transformers.PretrainedConfig` to use as configuration class for this model architecture.
- ``pretrained_model_archive_map``: a python ``dict`` of with `short-cut-names` (string) as keys and `url` (string) of associated pretrained weights as values.
- ``load_tf_weights``: a python ``method`` for loading a TensorFlow checkpoint in a PyTorch model, taking as arguments:
- ``model``: an instance of the relevant subclass of :class:`~pytorch_transformers.PreTrainedModel`,
- ``config``: an instance of the relevant subclass of :class:`~pytorch_transformers.PretrainedConfig`,
- ``path``: a path (string) to the TensorFlow checkpoint.
- ``base_model_prefix``: a string indicating the attribute associated to the base model in derived classes of the same architecture adding modules on top of the base model.
"""
config_class = None
pretrained_model_archive_map = {}
load_pt_weights = lambda model, config, path: None
base_model_prefix = ""
def __init__(self, config, *inputs, **kwargs):
super(TFPreTrainedModel, self).__init__()
if not isinstance(config, PretrainedConfig):
raise ValueError(
"Parameter config in `{}(config)` should be an instance of class `PretrainedConfig`. "
"To create a model from a pretrained model use "
"`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`".format(
self.__class__.__name__, self.__class__.__name__
))
# Save config in model
self.config = config
def _get_resized_embeddings(self, old_embeddings, new_num_tokens=None):
""" Build a resized Embedding Module from a provided token Embedding Module.
Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end
Args:
new_num_tokens: (`optional`) int
New number of tokens in the embedding matrix.
Increasing the size will add newly initialized vectors at the end
Reducing the size will remove vectors from the end
If not provided or None: return the provided token Embedding Module.
Return: ``torch.nn.Embeddings``
Pointer to the resized Embedding Module or the old Embedding Module if new_num_tokens is None
"""
raise NotImplementedError
def _tie_or_clone_weights(self, first_module, second_module):
""" Tie or clone module weights depending of weither we are using TorchScript or not
"""
raise NotImplementedError
def resize_token_embeddings(self, new_num_tokens=None):
""" Resize input token embeddings matrix of the model if new_num_tokens != config.vocab_size.
Take care of tying weights embeddings afterwards if the model class has a `tie_weights()` method.
Arguments:
new_num_tokens: (`optional`) int:
New number of tokens in the embedding matrix. Increasing the size will add newly initialized vectors at the end. Reducing the size will remove vectors from the end.
If not provided or None: does nothing and just returns a pointer to the input tokens ``torch.nn.Embeddings`` Module of the model.
Return: ``torch.nn.Embeddings``
Pointer to the input tokens Embeddings Module of the model
"""
raise NotImplementedError
def prune_heads(self, heads_to_prune):
""" Prunes heads of the base model.
Arguments:
heads_to_prune: dict with keys being selected layer indices (`int`) and associated values being the list of heads to prune in said layer (list of `int`).
"""
raise NotImplementedError
def save_pretrained(self, save_directory):
""" Save a model and its configuration file to a directory, so that it
can be re-loaded using the `:func:`~pytorch_transformers.PreTrainedModel.from_pretrained`` class method.
"""
raise NotImplementedError
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
r"""Instantiate a pretrained pytorch model from a pre-trained model configuration.
The model is set in evaluation mode by default using ``model.eval()`` (Dropout modules are deactivated)
To train the model, you should first set it back in training mode with ``model.train()``
The warning ``Weights from XXX not initialized from pretrained model`` means that the weights of XXX do not come pre-trained with the rest of the model.
It is up to you to train those weights with a downstream fine-tuning task.
The warning ``Weights from XXX not used in YYY`` means that the layer XXX is not used by YYY, therefore those weights are discarded.
Parameters:
pretrained_model_name_or_path: either:
- a string with the `shortcut name` of a pre-trained model to load from cache or download, e.g.: ``bert-base-uncased``.
- a path to a `directory` containing model weights saved using :func:`~pytorch_transformers.PreTrainedModel.save_pretrained`, e.g.: ``./my_model_directory/``.
- a path or url to a `PyTorch state_dict save file` (e.g. `./pt_model/pytorch_model.bin`). In this case, ``from_pt`` should be set to True and a configuration object should be provided as ``config`` argument. This loading path is slower than converting the PyTorch checkpoint in a TensorFlow model using the provided conversion scripts and loading the TensorFlow model afterwards.
model_args: (`optional`) Sequence of positional arguments:
All remaning positional arguments will be passed to the underlying model's ``__init__`` method
config: (`optional`) instance of a class derived from :class:`~pytorch_transformers.PretrainedConfig`:
Configuration for the model to use instead of an automatically loaded configuation. Configuration can be automatically loaded when:
- the model is a model provided by the library (loaded with the ``shortcut-name`` string of a pretrained model), or
- the model was saved using :func:`~pytorch_transformers.PreTrainedModel.save_pretrained` and is reloaded by suppling the save directory.
- the model is loaded by suppling a local directory as ``pretrained_model_name_or_path`` and a configuration JSON file named `config.json` is found in the directory.
from_pt: (`optional`) boolean, default False:
Load the model weights from a PyTorch state_dict save file (see docstring of pretrained_model_name_or_path argument).
cache_dir: (`optional`) string:
Path to a directory in which a downloaded pre-trained model
configuration should be cached if the standard cache should not be used.
force_download: (`optional`) boolean, default False:
Force to (re-)download the model weights and configuration files and override the cached versions if they exists.
proxies: (`optional`) dict, default None:
A dictionary of proxy servers to use by protocol or endpoint, e.g.: {'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}.
The proxies are used on each request.
output_loading_info: (`optional`) boolean:
Set to ``True`` to also return a dictionnary containing missing keys, unexpected keys and error messages.
kwargs: (`optional`) Remaining dictionary of keyword arguments:
Can be used to update the configuration object (after it being loaded) and initiate the model. (e.g. ``output_attention=True``). Behave differently depending on whether a `config` is provided or automatically loaded:
- If a configuration is provided with ``config``, ``**kwargs`` will be directly passed to the underlying model's ``__init__`` method (we assume all relevant updates to the configuration have already been done)
- If a configuration is not provided, ``kwargs`` will be first passed to the configuration class initialization function (:func:`~pytorch_transformers.PretrainedConfig.from_pretrained`). Each key of ``kwargs`` that corresponds to a configuration attribute will be used to override said attribute with the supplied ``kwargs`` value. Remaining keys that do not correspond to any configuration attribute will be passed to the underlying model's ``__init__`` function.
Examples::
model = BertModel.from_pretrained('bert-base-uncased') # Download model and configuration from S3 and cache.
model = BertModel.from_pretrained('./test/saved_model/') # E.g. model was saved using `save_pretrained('./test/saved_model/')`
model = BertModel.from_pretrained('bert-base-uncased', output_attention=True) # Update configuration during loading
assert model.config.output_attention == True
# Loading from a TF checkpoint file instead of a PyTorch model (slower)
config = BertConfig.from_json_file('./tf_model/my_tf_model_config.json')
model = BertModel.from_pretrained('./tf_model/my_tf_checkpoint.ckpt.index', from_pt=True, config=config)
"""
config = kwargs.pop('config', None)
cache_dir = kwargs.pop('cache_dir', None)
from_pt = kwargs.pop('from_pt', False)
force_download = kwargs.pop('force_download', False)
proxies = kwargs.pop('proxies', None)
output_loading_info = kwargs.pop('output_loading_info', False)
# Load config
if config is None:
config, model_kwargs = cls.config_class.from_pretrained(
pretrained_model_name_or_path, *model_args,
cache_dir=cache_dir, return_unused_kwargs=True,
force_download=force_download,
**kwargs
)
else:
model_kwargs = kwargs
# Load model
if pretrained_model_name_or_path in cls.pretrained_model_archive_map:
archive_file = cls.pretrained_model_archive_map[pretrained_model_name_or_path]
elif os.path.isdir(pretrained_model_name_or_path):
if from_pt:
# Load from a PyTorch checkpoint
archive_file = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)
else:
archive_file = os.path.join(pretrained_model_name_or_path, TF_WEIGHTS_NAME)
else:
archive_file = pretrained_model_name_or_path
# redirect to the cache, if necessary
try:
resolved_archive_file = cached_path(archive_file, cache_dir=cache_dir, force_download=force_download, proxies=proxies)
except EnvironmentError:
if pretrained_model_name_or_path in cls.pretrained_model_archive_map:
logger.error(
"Couldn't reach server at '{}' to download pretrained weights.".format(
archive_file))
else:
logger.error(
"Model name '{}' was not found in model name list ({}). "
"We assumed '{}' was a path or url but couldn't find any file "
"associated to this path or url.".format(
pretrained_model_name_or_path,
', '.join(cls.pretrained_model_archive_map.keys()),
archive_file))
return None
if resolved_archive_file == archive_file:
logger.info("loading weights file {}".format(archive_file))
else:
logger.info("loading weights file {} from cache at {}".format(
archive_file, resolved_archive_file))
# Instantiate model.
model = cls(config, *model_args, **model_kwargs)
if from_pt:
# Load from a PyTorch checkpoint
return cls.load_pt_weights(model, config, resolved_archive_file)
inputs = tf.constant([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]])
ret = model(inputs, training=False) # build the network with dummy inputs
# 'by_name' allow us to do transfer learning by skipping/adding layers
# see https://github.com/tensorflow/tensorflow/blob/00fad90125b18b80fe054de1055770cfb8fe4ba3/tensorflow/python/keras/engine/network.py#L1339-L1357
model.load_weights(resolved_archive_file, by_name=True)
ret = model(inputs, training=False) # Make sure restore ops are run
# if hasattr(model, 'tie_weights'):
# model.tie_weights() # TODO make sure word embedding weights are still tied
if output_loading_info:
loading_info = {"missing_keys": missing_keys, "unexpected_keys": unexpected_keys, "error_msgs": error_msgs}
return model, loading_info
return model
# coding=utf-8
# Copyright 2019 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import os
import shutil
import json
import random
import uuid
import unittest
import logging
import tensorflow as tf
from pytorch_transformers import TFPreTrainedModel
# from pytorch_transformers.modeling_bert import BertModel, BertConfig, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
def _config_zero_init(config):
configs_no_init = copy.deepcopy(config)
for key in configs_no_init.__dict__.keys():
if '_range' in key or '_std' in key:
setattr(configs_no_init, key, 0.0)
return configs_no_init
class TFCommonTestCases:
class TFCommonModelTester(unittest.TestCase):
model_tester = None
all_model_classes = ()
test_torchscript = True
test_pruning = True
test_resize_embeddings = True
def test_initialization(self):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# configs_no_init = _config_zero_init(config)
# for model_class in self.all_model_classes:
# model = model_class(config=configs_no_init)
# for name, param in model.named_parameters():
# if param.requires_grad:
# self.assertIn(param.data.mean().item(), [0.0, 1.0],
# msg="Parameter {} of model {} seems not properly initialized".format(name, model_class))
def test_attention_outputs(self):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# for model_class in self.all_model_classes:
# config.output_attentions = True
# config.output_hidden_states = False
# model = model_class(config)
# model.eval()
# outputs = model(**inputs_dict)
# attentions = outputs[-1]
# self.assertEqual(model.config.output_attentions, True)
# self.assertEqual(model.config.output_hidden_states, False)
# self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# self.assertListEqual(
# list(attentions[0].shape[-3:]),
# [self.model_tester.num_attention_heads,
# self.model_tester.seq_length,
# self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length])
# out_len = len(outputs)
# # Check attention is always last and order is fine
# config.output_attentions = True
# config.output_hidden_states = True
# model = model_class(config)
# model.eval()
# outputs = model(**inputs_dict)
# self.assertEqual(out_len+1, len(outputs))
# self.assertEqual(model.config.output_attentions, True)
# self.assertEqual(model.config.output_hidden_states, True)
# attentions = outputs[-1]
# self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)
# self.assertListEqual(
# list(attentions[0].shape[-3:]),
# [self.model_tester.num_attention_heads,
# self.model_tester.seq_length,
# self.model_tester.key_len if hasattr(self.model_tester, 'key_len') else self.model_tester.seq_length])
def test_headmasking(self):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# config.output_attentions = True
# config.output_hidden_states = True
# configs_no_init = _config_zero_init(config) # To be sure we have no Nan
# for model_class in self.all_model_classes:
# model = model_class(config=configs_no_init)
# model.eval()
# # Prepare head_mask
# # Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
# head_mask = torch.ones(self.model_tester.num_hidden_layers, self.model_tester.num_attention_heads)
# head_mask[0, 0] = 0
# head_mask[-1, :-1] = 0
# head_mask.requires_grad_(requires_grad=True)
# inputs = inputs_dict.copy()
# inputs['head_mask'] = head_mask
# outputs = model(**inputs)
# # Test that we can get a gradient back for importance score computation
# output = sum(t.sum() for t in outputs[0])
# output = output.sum()
# output.backward()
# multihead_outputs = head_mask.grad
# attentions = outputs[-1]
# hidden_states = outputs[-2]
# # Remove Nan
# self.assertIsNotNone(multihead_outputs)
# self.assertEqual(len(multihead_outputs), self.model_tester.num_hidden_layers)
# self.assertAlmostEqual(
# attentions[0][..., 0, :, :].flatten().sum().item(), 0.0)
# self.assertNotEqual(
# attentions[0][..., -1, :, :].flatten().sum().item(), 0.0)
# self.assertNotEqual(
# attentions[1][..., 0, :, :].flatten().sum().item(), 0.0)
# self.assertAlmostEqual(
# attentions[-1][..., -2, :, :].flatten().sum().item(), 0.0)
# self.assertNotEqual(
# attentions[-1][..., -1, :, :].flatten().sum().item(), 0.0)
def test_head_pruning(self):
pass
# if not self.test_pruning:
# return
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# for model_class in self.all_model_classes:
# config.output_attentions = True
# config.output_hidden_states = False
# model = model_class(config=config)
# model.eval()
# heads_to_prune = {0: list(range(1, self.model_tester.num_attention_heads)),
# -1: [0]}
# model.prune_heads(heads_to_prune)
# outputs = model(**inputs_dict)
# attentions = outputs[-1]
# self.assertEqual(
# attentions[0].shape[-3], 1)
# self.assertEqual(
# attentions[1].shape[-3], self.model_tester.num_attention_heads)
# self.assertEqual(
# attentions[-1].shape[-3], self.model_tester.num_attention_heads - 1)
def test_hidden_states_output(self):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# for model_class in self.all_model_classes:
# config.output_hidden_states = True
# config.output_attentions = False
# model = model_class(config)
# model.eval()
# outputs = model(**inputs_dict)
# hidden_states = outputs[-1]
# self.assertEqual(model.config.output_attentions, False)
# self.assertEqual(model.config.output_hidden_states, True)
# self.assertEqual(len(hidden_states), self.model_tester.num_hidden_layers + 1)
# self.assertListEqual(
# list(hidden_states[0].shape[-2:]),
# [self.model_tester.seq_length, self.model_tester.hidden_size])
def test_resize_tokens_embeddings(self):
pass
# original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# if not self.test_resize_embeddings:
# return
# for model_class in self.all_model_classes:
# config = copy.deepcopy(original_config)
# model = model_class(config)
# model_vocab_size = config.vocab_size
# # Retrieve the embeddings and clone theme
# model_embed = model.resize_token_embeddings(model_vocab_size)
# cloned_embeddings = model_embed.weight.clone()
# # Check that resizing the token embeddings with a larger vocab size increases the model's vocab size
# model_embed = model.resize_token_embeddings(model_vocab_size + 10)
# self.assertEqual(model.config.vocab_size, model_vocab_size + 10)
# # Check that it actually resizes the embeddings matrix
# self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] + 10)
# # Check that resizing the token embeddings with a smaller vocab size decreases the model's vocab size
# model_embed = model.resize_token_embeddings(model_vocab_size - 15)
# self.assertEqual(model.config.vocab_size, model_vocab_size - 15)
# # Check that it actually resizes the embeddings matrix
# self.assertEqual(model_embed.weight.shape[0], cloned_embeddings.shape[0] - 15)
# # Check that adding and removing tokens has not modified the first part of the embedding matrix.
# models_equal = True
# for p1, p2 in zip(cloned_embeddings, model_embed.weight):
# if p1.data.ne(p2.data).sum() > 0:
# models_equal = False
# self.assertTrue(models_equal)
def test_tie_model_weights(self):
pass
# config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# def check_same_values(layer_1, layer_2):
# equal = True
# for p1, p2 in zip(layer_1.weight, layer_2.weight):
# if p1.data.ne(p2.data).sum() > 0:
# equal = False
# return equal
# for model_class in self.all_model_classes:
# if not hasattr(model_class, 'tie_weights'):
# continue
# config.torchscript = True
# model_not_tied = model_class(config)
# params_not_tied = list(model_not_tied.parameters())
# config_tied = copy.deepcopy(config)
# config_tied.torchscript = False
# model_tied = model_class(config_tied)
# params_tied = list(model_tied.parameters())
# # Check that the embedding layer and decoding layer are the same in size and in value
# self.assertGreater(len(params_not_tied), len(params_tied))
# # Check that after resize they remain tied.
# model_tied.resize_token_embeddings(config.vocab_size + 10)
# params_tied_2 = list(model_tied.parameters())
# self.assertGreater(len(params_not_tied), len(params_tied))
# self.assertEqual(len(params_tied_2), len(params_tied))
def ids_tensor(shape, vocab_size, rng=None, name=None):
"""Creates a random int32 tensor of the shape within the vocab size."""
if rng is None:
rng = random.Random()
total_dims = 1
for dim in shape:
total_dims *= dim
values = []
for _ in range(total_dims):
values.append(rng.randint(0, vocab_size - 1))
return tf.constant(values, shape=shape)
class TFModelUtilsTest(unittest.TestCase):
def test_model_from_pretrained(self):
pass
# logging.basicConfig(level=logging.INFO)
# for model_name in list(BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
# config = BertConfig.from_pretrained(model_name)
# self.assertIsNotNone(config)
# self.assertIsInstance(config, PretrainedConfig)
# model = BertModel.from_pretrained(model_name)
# model, loading_info = BertModel.from_pretrained(model_name, output_loading_info=True)
# self.assertIsNotNone(model)
# self.assertIsInstance(model, PreTrainedModel)
# for value in loading_info.values():
# self.assertEqual(len(value), 0)
# config = BertConfig.from_pretrained(model_name, output_attentions=True, output_hidden_states=True)
# model = BertModel.from_pretrained(model_name, output_attentions=True, output_hidden_states=True)
# self.assertEqual(model.config.output_attentions, True)
# self.assertEqual(model.config.output_hidden_states, True)
# self.assertEqual(model.config, config)
if __name__ == "__main__":
unittest.main()
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
import shutil
import pytest
import tensorflow as tf
from pytorch_transformers import (BertConfig)
from pytorch_transformers.modeling_tf_bert import TFBertModel, TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from .modeling_tf_common_test import (TFCommonTestCases, ids_tensor)
from .configuration_common_test import ConfigTester
class TFBertModelTest(TFCommonTestCases.TFCommonModelTester):
all_model_classes = (TFBertModel,)
# BertForMaskedLM, BertForNextSentencePrediction,
# BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification,
# BertForTokenClassification)
class TFBertModelTester(object):
def __init__(self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_input_mask=True,
use_token_type_ids=True,
use_labels=True,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=512,
type_vocab_size=16,
type_sequence_label_size=2,
initializer_range=0.02,
num_labels=3,
num_choices=4,
scope=None,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_input_mask = use_input_mask
self.use_token_type_ids = use_token_type_ids
self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size
self.type_sequence_label_size = type_sequence_label_size
self.initializer_range = initializer_range
self.num_labels = num_labels
self.num_choices = num_choices
self.scope = scope
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_mask = None
if self.use_input_mask:
input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
token_type_ids = None
if self.use_token_type_ids:
token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
sequence_labels = None
token_labels = None
choice_labels = None
if self.use_labels:
sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
choice_labels = ids_tensor([self.batch_size], self.num_choices)
config = BertConfig(
vocab_size_or_config_json_file=self.vocab_size,
hidden_size=self.hidden_size,
num_hidden_layers=self.num_hidden_layers,
num_attention_heads=self.num_attention_heads,
intermediate_size=self.intermediate_size,
hidden_act=self.hidden_act,
hidden_dropout_prob=self.hidden_dropout_prob,
attention_probs_dropout_prob=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
type_vocab_size=self.type_vocab_size,
initializer_range=self.initializer_range)
return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def check_loss_output(self, result):
self.parent.assertListEqual(
list(result["loss"].size()),
[])
def create_and_check_bert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
model = TFBertModel(config=config)
# model.eval()
inputs = {'input_ids': input_ids,
'attention_mask': input_mask,
'token_type_ids': token_type_ids}
sequence_output, pooled_output = model(inputs)
inputs = [input_ids, input_mask]
sequence_output, pooled_output = model(inputs)
sequence_output, pooled_output = model(input_ids)
result = {
"sequence_output": sequence_output.numpy(),
"pooled_output": pooled_output.numpy(),
}
self.parent.assertListEqual(
list(result["sequence_output"].shape),
[self.batch_size, self.seq_length, self.hidden_size])
self.parent.assertListEqual(list(result["pooled_output"].shape), [self.batch_size, self.hidden_size])
def create_and_check_bert_for_masked_lm(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
pass
# model = BertForMaskedLM(config=config)
# model.eval()
# loss, prediction_scores = model(input_ids, token_type_ids, input_mask, token_labels)
# result = {
# "loss": loss,
# "prediction_scores": prediction_scores,
# }
# self.parent.assertListEqual(
# list(result["prediction_scores"].size()),
# [self.batch_size, self.seq_length, self.vocab_size])
# self.check_loss_output(result)
def create_and_check_bert_for_next_sequence_prediction(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
pass
# model = BertForNextSentencePrediction(config=config)
# model.eval()
# loss, seq_relationship_score = model(input_ids, token_type_ids, input_mask, sequence_labels)
# result = {
# "loss": loss,
# "seq_relationship_score": seq_relationship_score,
# }
# self.parent.assertListEqual(
# list(result["seq_relationship_score"].size()),
# [self.batch_size, 2])
# self.check_loss_output(result)
def create_and_check_bert_for_pretraining(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
pass
# model = BertForPreTraining(config=config)
# model.eval()
# loss, prediction_scores, seq_relationship_score = model(input_ids, token_type_ids, input_mask, token_labels, sequence_labels)
# result = {
# "loss": loss,
# "prediction_scores": prediction_scores,
# "seq_relationship_score": seq_relationship_score,
# }
# self.parent.assertListEqual(
# list(result["prediction_scores"].size()),
# [self.batch_size, self.seq_length, self.vocab_size])
# self.parent.assertListEqual(
# list(result["seq_relationship_score"].size()),
# [self.batch_size, 2])
# self.check_loss_output(result)
def create_and_check_bert_for_question_answering(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
pass
# model = BertForQuestionAnswering(config=config)
# model.eval()
# loss, start_logits, end_logits = model(input_ids, token_type_ids, input_mask, sequence_labels, sequence_labels)
# result = {
# "loss": loss,
# "start_logits": start_logits,
# "end_logits": end_logits,
# }
# self.parent.assertListEqual(
# list(result["start_logits"].size()),
# [self.batch_size, self.seq_length])
# self.parent.assertListEqual(
# list(result["end_logits"].size()),
# [self.batch_size, self.seq_length])
# self.check_loss_output(result)
def create_and_check_bert_for_sequence_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
pass
# config.num_labels = self.num_labels
# model = BertForSequenceClassification(config)
# model.eval()
# loss, logits = model(input_ids, token_type_ids, input_mask, sequence_labels)
# result = {
# "loss": loss,
# "logits": logits,
# }
# self.parent.assertListEqual(
# list(result["logits"].size()),
# [self.batch_size, self.num_labels])
# self.check_loss_output(result)
def create_and_check_bert_for_token_classification(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
pass
# config.num_labels = self.num_labels
# model = BertForTokenClassification(config=config)
# model.eval()
# loss, logits = model(input_ids, token_type_ids, input_mask, token_labels)
# result = {
# "loss": loss,
# "logits": logits,
# }
# self.parent.assertListEqual(
# list(result["logits"].size()),
# [self.batch_size, self.seq_length, self.num_labels])
# self.check_loss_output(result)
def create_and_check_bert_for_multiple_choice(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels):
pass
# config.num_choices = self.num_choices
# model = BertForMultipleChoice(config=config)
# model.eval()
# multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
# multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
# multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
# loss, logits = model(multiple_choice_inputs_ids,
# multiple_choice_token_type_ids,
# multiple_choice_input_mask,
# choice_labels)
# result = {
# "loss": loss,
# "logits": logits,
# }
# self.parent.assertListEqual(
# list(result["logits"].size()),
# [self.batch_size, self.num_choices])
# self.check_loss_output(result)
def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs()
(config, input_ids, token_type_ids, input_mask,
sequence_labels, token_labels, choice_labels) = config_and_inputs
inputs_dict = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': input_mask}
return config, inputs_dict
def setUp(self):
self.model_tester = TFBertModelTest.TFBertModelTester(self)
self.config_tester = ConfigTester(self, config_class=BertConfig, hidden_size=37)
def test_config(self):
self.config_tester.run_common_tests()
def test_bert_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_model(*config_and_inputs)
def test_for_masked_lm(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_masked_lm(*config_and_inputs)
def test_for_multiple_choice(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_multiple_choice(*config_and_inputs)
def test_for_next_sequence_prediction(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_next_sequence_prediction(*config_and_inputs)
def test_for_pretraining(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_pretraining(*config_and_inputs)
def test_for_question_answering(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_question_answering(*config_and_inputs)
def test_for_sequence_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_sequence_classification(*config_and_inputs)
def test_for_token_classification(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_bert_for_token_classification(*config_and_inputs)
@pytest.mark.slow
def test_model_from_pretrained(self):
cache_dir = "/tmp/pytorch_transformers_test/"
for model_name in list(TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
model = TFBertModel.from_pretrained(model_name, cache_dir=cache_dir)
shutil.rmtree(cache_dir)
self.assertIsNotNone(model)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment