Unverified Commit cbe63949 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

Model Templates for Seq2Seq (#9251)

* adapt cookie cutter

* fix copy past statement

* delete copy statements for now

* remove unused import from template

* make doc rst

* correct config docstring

* correct training

* correct inputs processing tf enc dec

* make style

* adapt templates

* clean tabs

* correct tensor -> Tensor naming

* correct indent

* correct templates

* fix the test

* break lines to avoid > 119

* Apply suggestions from code review
parent e6c1f1ca
...@@ -40,6 +40,8 @@ jobs: ...@@ -40,6 +40,8 @@ jobs:
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/standalone.json --path=templates/adding_a_new_model
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-encoder-bert-tokenizer.json --path=templates/adding_a_new_model
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/tf-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
transformers-cli add-new-model --testing --testing_file=templates/adding_a_new_model/tests/pt-seq-2-seq-bart-tokenizer.json --path=templates/adding_a_new_model
make style make style
python utils/check_table.py --fix_and_overwrite python utils/check_table.py --fix_and_overwrite
python utils/check_dummies.py --fix_and_overwrite python utils/check_dummies.py --fix_and_overwrite
......
...@@ -1077,7 +1077,7 @@ class TFBartModel(TFBartPretrainedModel): ...@@ -1077,7 +1077,7 @@ class TFBartModel(TFBartPretrainedModel):
decoder_outputs = self.decoder( decoder_outputs = self.decoder(
inputs["decoder_input_ids"], inputs["decoder_input_ids"],
attention_mask=decoder_attention_mask, attention_mask=inputs["decoder_attention_mask"],
encoder_hidden_states=inputs["encoder_outputs"][0], encoder_hidden_states=inputs["encoder_outputs"][0],
encoder_attention_mask=inputs["attention_mask"], encoder_attention_mask=inputs["attention_mask"],
past_key_values=inputs["past_key_values"], past_key_values=inputs["past_key_values"],
...@@ -1228,6 +1228,7 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel): ...@@ -1228,6 +1228,7 @@ class TFBartForConditionalGeneration(TFBartPretrainedModel):
output_attentions=inputs["output_attentions"], output_attentions=inputs["output_attentions"],
output_hidden_states=inputs["output_hidden_states"], output_hidden_states=inputs["output_hidden_states"],
return_dict=inputs["return_dict"], return_dict=inputs["return_dict"],
training=inputs["training"],
) )
lm_logits = self.model.shared(outputs[0], mode="linear") lm_logits = self.model.shared(outputs[0], mode="linear")
lm_logits = lm_logits + self.final_logits_bias lm_logits = lm_logits + self.final_logits_bias
......
...@@ -30,6 +30,7 @@ if is_tokenizers_available(): ...@@ -30,6 +30,7 @@ if is_tokenizers_available():
from .tokenization_{{cookiecutter.lowercase_modelname}}_fast import {{cookiecutter.camelcase_modelname}}TokenizerFast from .tokenization_{{cookiecutter.lowercase_modelname}}_fast import {{cookiecutter.camelcase_modelname}}TokenizerFast
{%- if (cookiecutter.generate_tensorflow_and_pytorch == "PyTorch & TensorFlow" or cookiecutter.generate_tensorflow_and_pytorch == "PyTorch") %} {%- if (cookiecutter.generate_tensorflow_and_pytorch == "PyTorch & TensorFlow" or cookiecutter.generate_tensorflow_and_pytorch == "PyTorch") %}
{% if cookiecutter.is_encoder_decoder_model == "False" %}
if is_torch_available(): if is_torch_available():
from .modeling_{{cookiecutter.lowercase_modelname}} import ( from .modeling_{{cookiecutter.lowercase_modelname}} import (
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST, {{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
...@@ -44,8 +45,20 @@ if is_torch_available(): ...@@ -44,8 +45,20 @@ if is_torch_available():
{{cookiecutter.camelcase_modelname}}PreTrainedModel, {{cookiecutter.camelcase_modelname}}PreTrainedModel,
load_tf_weights_in_{{cookiecutter.lowercase_modelname}}, load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
) )
{% else %}
if is_torch_available():
from .modeling_{{cookiecutter.lowercase_modelname}} import (
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
{{cookiecutter.camelcase_modelname}}Model,
{{cookiecutter.camelcase_modelname}}PreTrainedModel,
)
{% endif %}
{% endif %} {% endif %}
{%- if (cookiecutter.generate_tensorflow_and_pytorch == "PyTorch & TensorFlow" or cookiecutter.generate_tensorflow_and_pytorch == "TensorFlow") %} {%- if (cookiecutter.generate_tensorflow_and_pytorch == "PyTorch & TensorFlow" or cookiecutter.generate_tensorflow_and_pytorch == "TensorFlow") %}
{% if cookiecutter.is_encoder_decoder_model == "False" %}
if is_tf_available(): if is_tf_available():
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import ( from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST, TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
...@@ -59,4 +72,12 @@ if is_tf_available(): ...@@ -59,4 +72,12 @@ if is_tf_available():
TF{{cookiecutter.camelcase_modelname}}Model, TF{{cookiecutter.camelcase_modelname}}Model,
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel, TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
) )
{% endif %} {% else %}
\ No newline at end of file if is_tf_available():
from .modeling_tf_{{cookiecutter.lowercase_modelname}} import (
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
TF{{cookiecutter.camelcase_modelname}}Model,
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
)
{% endif %}
{% endif %}
...@@ -6,5 +6,6 @@ ...@@ -6,5 +6,6 @@
"authors": "{{cookiecutter.authors}}", "authors": "{{cookiecutter.authors}}",
"checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}", "checkpoint_identifier": "{{cookiecutter.checkpoint_identifier}}",
"tokenizer_type": "{{cookiecutter.tokenizer_type}}", "tokenizer_type": "{{cookiecutter.tokenizer_type}}",
"generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}" "generate_tensorflow_and_pytorch": "{{cookiecutter.generate_tensorflow_and_pytorch}}",
"is_encoder_decoder_model": ["True", "False"]
} }
...@@ -39,6 +39,7 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig): ...@@ -39,6 +39,7 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
Args: Args:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
vocab_size (:obj:`int`, `optional`, defaults to 30522): vocab_size (:obj:`int`, `optional`, defaults to 30522):
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
:obj:`inputs_ids` passed when calling :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model` or :obj:`inputs_ids` passed when calling :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model` or
...@@ -70,6 +71,50 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig): ...@@ -70,6 +71,50 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices. The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12): layer_norm_eps (:obj:`float`, `optional`, defaults to 1e-12):
The epsilon used by the layer normalization layers. The epsilon used by the layer normalization layers.
{% else -%}
vocab_size (:obj:`int`, `optional`, defaults to 50265):
Vocabulary size of the {{cookiecutter.modelname}} model. Defines the number of different tokens that can be represented by the
:obj:`inputs_ids` passed when calling :class:`~transformers.{{cookiecutter.camelcase_modelname}}Model` or
:class:`~transformers.TF{{cookiecutter.camelcase_modelname}}Model`.
d_model (:obj:`int`, `optional`, defaults to 1024):
Dimensionality of the layers and the pooler layer.
encoder_layers (:obj:`int`, `optional`, defaults to 12):
Number of encoder layers.
decoder_layers (:obj:`int`, `optional`, defaults to 12):
Number of decoder layers.
encoder_attention_heads (:obj:`int`, `optional`, defaults to 16):
Number of attention heads for each attention layer in the Transformer encoder.
decoder_attention_heads (:obj:`int`, `optional`, defaults to 16):
Number of attention heads for each attention layer in the Transformer decoder.
decoder_ffn_dim (:obj:`int`, `optional`, defaults to 4096):
Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
encoder_ffn_dim (:obj:`int`, `optional`, defaults to 4096):
Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
activation_function (:obj:`str` or :obj:`function`, `optional`, defaults to :obj:`"gelu"`):
The non-linear activation function (function or string) in the encoder and pooler. If string,
:obj:`"gelu"`, :obj:`"relu"`, :obj:`"silu"` and :obj:`"gelu_new"` are supported.
dropout (:obj:`float`, `optional`, defaults to 0.1):
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
attention_dropout (:obj:`float`, `optional`, defaults to 0.0):
The dropout ratio for the attention probabilities.
activation_dropout (:obj:`float`, `optional`, defaults to 0.0):
The dropout ratio for activations inside the fully connected layer.
classifier_dropout (:obj:`float`, `optional`, defaults to 0.0):
The dropout ratio for classifier.
max_position_embeddings (:obj:`int`, `optional`, defaults to 1024):
The maximum sequence length that this model might ever be used with. Typically set this to something large
just in case (e.g., 512 or 1024 or 2048).
init_std (:obj:`float`, `optional`, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
encoder_layerdrop: (:obj:`float`, `optional`, defaults to 0.0):
The LayerDrop probability for the encoder. See the `LayerDrop paper <see
https://arxiv.org/abs/1909.11556>`__ for more details.
decoder_layerdrop: (:obj:`float`, `optional`, defaults to 0.0):
The LayerDrop probability for the decoder. See the `LayerDrop paper <see
https://arxiv.org/abs/1909.11556>`__ for more details.
use_cache (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether or not the model should return the last key/values attentions (not used by all models).
{% endif -%}
Example:: Example::
...@@ -88,9 +133,9 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig): ...@@ -88,9 +133,9 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
def __init__( def __init__(
self, self,
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
vocab_size=30522, vocab_size=30522,
hidden_size=768, hidden_size=768,
is_encoder_decoder=False,
num_hidden_layers=12, num_hidden_layers=12,
num_attention_heads=12, num_attention_heads=12,
intermediate_size=3072, intermediate_size=3072,
...@@ -101,6 +146,29 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig): ...@@ -101,6 +146,29 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
type_vocab_size=2, type_vocab_size=2,
initializer_range=0.02, initializer_range=0.02,
layer_norm_eps=1e-12, layer_norm_eps=1e-12,
is_encoder_decoder=False,
{% else -%}
vocab_size=50265,
max_position_embeddings=1024,
encoder_layers=12,
encoder_ffn_dim=4096,
encoder_attention_heads=16,
decoder_layers=12,
decoder_ffn_dim=4096,
decoder_attention_heads=16,
encoder_layerdrop=0.0,
decoder_layerdrop=0.0,
use_cache=True,
is_encoder_decoder=True,
activation_function="gelu",
d_model=1024,
dropout=0.1,
attention_dropout=0.0,
activation_dropout=0.0,
init_std=0.02,
decoder_start_token_id=2,
classifier_dropout=0.0,
{% endif -%}
pad_token_id=1, pad_token_id=1,
bos_token_id=0, bos_token_id=0,
eos_token_id=2, eos_token_id=2,
...@@ -108,13 +176,19 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig): ...@@ -108,13 +176,19 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
): ):
super().__init__( super().__init__(
pad_token_id=pad_token_id, pad_token_id=pad_token_id,
is_encoder_decoder=is_encoder_decoder,
bos_token_id=bos_token_id, bos_token_id=bos_token_id,
eos_token_id=eos_token_id, eos_token_id=eos_token_id,
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
{% else -%}
is_encoder_decoder=is_encoder_decoder,
decoder_start_token_id=decoder_start_token_id,
{% endif -%}
**kwargs **kwargs
) )
self.vocab_size = vocab_size self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads self.num_attention_heads = num_attention_heads
...@@ -122,8 +196,36 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig): ...@@ -122,8 +196,36 @@ class {{cookiecutter.camelcase_modelname}}Config(PretrainedConfig):
self.hidden_act = hidden_act self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.type_vocab_size = type_vocab_size
self.initializer_range = initializer_range self.initializer_range = initializer_range
self.type_vocab_size = type_vocab_size
self.layer_norm_eps = layer_norm_eps self.layer_norm_eps = layer_norm_eps
{% else -%}
self.d_model = d_model
self.encoder_ffn_dim = encoder_ffn_dim
self.encoder_layers = encoder_layers
self.encoder_attention_heads = encoder_attention_heads
self.decoder_ffn_dim = decoder_ffn_dim
self.decoder_layers = decoder_layers
self.decoder_attention_heads = decoder_attention_heads
self.dropout = dropout
self.attention_dropout = attention_dropout
self.activation_dropout = activation_dropout
self.activation_function = activation_function
self.init_std = init_std
self.encoder_layerdrop = encoder_layerdrop
self.decoder_layerdrop = decoder_layerdrop
self.classifier_dropout = classifier_dropout
self.use_cache = use_cache
self.num_hidden_layers = encoder_layers
{% endif -%}
{% if cookiecutter.is_encoder_decoder_model == "False" %}
{%- else %}
@property
def num_attention_heads(self) -> int:
return self.encoder_attention_heads
@property
def hidden_size(self) -> int:
return self.d_model
{%- endif %}
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
{% if cookiecutter.is_encoder_decoder_model == "False" %}
import unittest import unittest
...@@ -318,3 +319,272 @@ class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCa ...@@ -318,3 +319,272 @@ class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCa
] ]
) )
tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4) tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)
{% else %}
import unittest
from transformers import {{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}Tokenizer, is_tf_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_tf, slow
from .test_configuration_common import ConfigTester
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
if is_tf_available():
import tensorflow as tf
from transformers import TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration, TF{{cookiecutter.camelcase_modelname}}Model
@require_tf
class TF{{cookiecutter.camelcase_modelname}}ModelTester:
config_cls = {{cookiecutter.camelcase_modelname}}Config
config_updates = {}
hidden_act = "gelu"
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_labels=False,
vocab_size=99,
hidden_size=32,
num_hidden_layers=5,
num_attention_heads=4,
intermediate_size=37,
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=20,
eos_token_id=2,
pad_token_id=1,
bos_token_id=0,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.eos_token_id = eos_token_id
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
def prepare_config_and_inputs_for_common(self):
input_ids = ids_tensor([self.batch_size, self.seq_length - 1], self.vocab_size)
eos_tensor = tf.expand_dims(tf.constant([self.eos_token_id] * self.batch_size), 1)
input_ids = tf.concat([input_ids, eos_tensor], axis=1)
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
config = self.config_cls(
vocab_size=self.vocab_size,
d_model=self.hidden_size,
encoder_layers=self.num_hidden_layers,
decoder_layers=self.num_hidden_layers,
encoder_attention_heads=self.num_attention_heads,
decoder_attention_heads=self.num_attention_heads,
encoder_ffn_dim=self.intermediate_size,
decoder_ffn_dim=self.intermediate_size,
dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
eos_token_ids=[2],
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
decoder_start_token_id=self.pad_token_id,
**self.config_updates,
)
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(config, input_ids, decoder_input_ids)
return config, inputs_dict
def check_decoder_model_past_large_inputs(self, config, inputs_dict):
model = TF{{cookiecutter.camelcase_modelname}}Model(config=config).get_decoder()
input_ids = inputs_dict["input_ids"]
input_ids = input_ids[:1, :]
attention_mask = inputs_dict["attention_mask"][:1, :]
self.batch_size = 1
# first forward pass
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)
output, past_key_values = outputs.to_tuple()
past_key_values = past_key_values[1]
# create hypothetical next token and extent to next_input_ids
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
next_attn_mask = tf.cast(ids_tensor((self.batch_size, 3), 2), tf.int8)
# append to next input_ids and
next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
next_attention_mask = tf.concat([attention_mask, next_attn_mask], axis=-1)
output_from_no_past = model(next_input_ids, attention_mask=next_attention_mask)[0]
output_from_past = model(next_tokens, attention_mask=next_attention_mask, past_key_values=past_key_values)[0]
self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
# select random slice
random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
output_from_past_slice = output_from_past[:, :, random_slice_idx]
# test that outputs are equal for slice
tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
config,
input_ids,
decoder_input_ids,
attention_mask=None,
decoder_attention_mask=None,
):
if attention_mask is None:
attention_mask = tf.cast(tf.math.not_equal(input_ids, config.pad_token_id), tf.int8)
if decoder_attention_mask is None:
decoder_attention_mask = tf.cast(tf.math.not_equal(decoder_input_ids, config.pad_token_id), tf.int8)
return {
"input_ids": input_ids,
"decoder_input_ids": decoder_input_ids,
"attention_mask": attention_mask,
"decoder_attention_mask": decoder_attention_mask,
}
@require_tf
class TF{{cookiecutter.camelcase_modelname}}ModelTest(TFModelTesterMixin, unittest.TestCase):
all_model_classes = (TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration, TF{{cookiecutter.camelcase_modelname}}Model) if is_tf_available() else ()
all_generative_model_classes = (TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,) if is_tf_available() else ()
is_encoder_decoder = True
test_pruning = False
def setUp(self):
self.model_tester = TF{{cookiecutter.camelcase_modelname}}ModelTester(self)
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config)
def test_config(self):
self.config_tester.run_common_tests()
def test_decoder_model_past_large_inputs(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.check_decoder_model_past_large_inputs(*config_and_inputs)
def test_model_common_attributes(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
x = model.get_output_layer_with_bias()
assert x is None
name = model.get_prefix_bias_name()
assert name is None
def _assert_tensors_equal(a, b, atol=1e-12, prefix=""):
"""If tensors not close, or a and b arent both tensors, raise a nice Assertion error."""
if a is None and b is None:
return True
try:
if tf.debugging.assert_near(a, b, atol=atol):
return True
raise
except Exception:
msg = "{} != {}".format(a, b)
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
def _long_tensor(tok_lst):
return tf.constant(tok_lst, dtype=tf.int32)
TOLERANCE = 1e-4
@slow
@require_sentencepiece
@require_tokenizers
@require_tf
class TF{{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase):
def test_inference_no_head(self):
model = TF{{cookiecutter.camelcase_modelname}}Model.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
# change to intended input here
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
output = model(**inputs_dict)[0]
expected_shape = (1, 11, 1024)
self.assertEqual(output.shape, expected_shape)
# change to expected output here
expected_slice = tf.Tensor(
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
)
self.assertTrue(tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE))
def test_inference_with_head(self):
model = TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
# change to intended input here
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
output = model(**inputs_dict)[0]
expected_shape = (1, 11, 1024)
self.assertEqual(output.shape, expected_shape)
# change to expected output here
expected_slice = tf.Tensor(
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]],
)
self.assertTrue(tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=TOLERANCE))
def test_seq_to_seq_generation(self):
hf = TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
tok = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
batch_input = [
# string 1,
# string 2,
# string 3,
# string 4,
]
# The below article tests that we don't add any hypotheses outside of the top n_beams
dct = tok.batch_encode_plus(
batch_input,
max_length=512,
padding="max_length",
truncation_strategy="only_first",
truncation=True,
return_tensors="tf",
)
hypotheses_batch = hf.generate(
input_ids=dct["input_ids"],
attention_mask=dct["attention_mask"],
num_beams=2,
)
EXPECTED = [
# here expected 1,
# here expected 2,
# here expected 3,
# here expected 4,
]
generated = tok.batch_decode(
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
)
assert generated == EXPECTED
{%- endif %}
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
""" Testing suite for the PyTorch {{cookiecutter.modelname}} model. """ """ Testing suite for the PyTorch {{cookiecutter.modelname}} model. """
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
import unittest import unittest
from tests.test_modeling_common import floats_tensor from tests.test_modeling_common import floats_tensor
...@@ -406,3 +407,395 @@ class {{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase ...@@ -406,3 +407,395 @@ class {{cookiecutter.camelcase_modelname}}ModelIntegrationTest(unittest.TestCase
) )
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
{% else -%}
import copy
import tempfile
import unittest
import timeout_decorator # noqa
from transformers import is_torch_available
from transformers.file_utils import cached_property
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
from .test_configuration_common import ConfigTester
from .test_generation_utils import GenerationTesterMixin
from .test_modeling_common import ModelTesterMixin, ids_tensor
if is_torch_available():
import torch
from transformers import (
{{cookiecutter.camelcase_modelname}}Config,
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
{{cookiecutter.camelcase_modelname}}Model,
{{cookiecutter.camelcase_modelname}}Tokenizer,
)
from transformers.models.{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
{{cookiecutter.camelcase_modelname}}Decoder,
{{cookiecutter.camelcase_modelname}}Encoder,
)
def prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(
config,
input_ids,
decoder_input_ids,
attention_mask=None,
decoder_attention_mask=None,
):
if attention_mask is None:
attention_mask = input_ids.ne(config.pad_token_id)
if decoder_attention_mask is None:
decoder_attention_mask = decoder_input_ids.ne(config.pad_token_id)
return {
"input_ids": input_ids,
"decoder_input_ids": decoder_input_ids,
"attention_mask": attention_mask,
"decoder_attention_mask": attention_mask,
}
@require_torch
class {{cookiecutter.camelcase_modelname}}ModelTester:
def __init__(
self,
parent,
batch_size=13,
seq_length=7,
is_training=True,
use_labels=False,
vocab_size=99,
hidden_size=16,
num_hidden_layers=2,
num_attention_heads=4,
intermediate_size=4,
hidden_act="gelu",
hidden_dropout_prob=0.1,
attention_probs_dropout_prob=0.1,
max_position_embeddings=20,
eos_token_id=2,
pad_token_id=1,
bos_token_id=0,
):
self.parent = parent
self.batch_size = batch_size
self.seq_length = seq_length
self.is_training = is_training
self.use_labels = use_labels
self.vocab_size = vocab_size
self.hidden_size = hidden_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.intermediate_size = intermediate_size
self.hidden_act = hidden_act
self.hidden_dropout_prob = hidden_dropout_prob
self.attention_probs_dropout_prob = attention_probs_dropout_prob
self.max_position_embeddings = max_position_embeddings
self.eos_token_id = eos_token_id
self.pad_token_id = pad_token_id
self.bos_token_id = bos_token_id
def prepare_config_and_inputs(self):
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size).clamp(
3,
)
input_ids[:, -1] = self.eos_token_id # Eos Token
decoder_input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
config = {{cookiecutter.camelcase_modelname}}Config(
vocab_size=self.vocab_size,
d_model=self.hidden_size,
encoder_layers=self.num_hidden_layers,
decoder_layers=self.num_hidden_layers,
encoder_attention_heads=self.num_attention_heads,
decoder_attention_heads=self.num_attention_heads,
encoder_ffn_dim=self.intermediate_size,
decoder_ffn_dim=self.intermediate_size,
dropout=self.hidden_dropout_prob,
attention_dropout=self.attention_probs_dropout_prob,
max_position_embeddings=self.max_position_embeddings,
eos_token_id=self.eos_token_id,
bos_token_id=self.bos_token_id,
pad_token_id=self.pad_token_id,
)
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(config, input_ids, decoder_input_ids)
return config, inputs_dict
def prepare_config_and_inputs_for_common(self):
config, inputs_dict = self.prepare_config_and_inputs()
return config, inputs_dict
def create_and_check_decoder_model_past_large_inputs(self, config, inputs_dict):
model = {{cookiecutter.camelcase_modelname}}Model(config=config).get_decoder().to(torch_device).eval()
input_ids = inputs_dict["input_ids"]
attention_mask = inputs_dict["attention_mask"]
# first forward pass
outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)
output, past_key_values = outputs.to_tuple()
# create hypothetical multiple next token and extent to next_input_ids
next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
next_attn_mask = ids_tensor((self.batch_size, 3), 2)
# append to next input_ids and
next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
next_attention_mask = torch.cat([attention_mask, next_attn_mask], dim=-1)
output_from_no_past = model(next_input_ids, attention_mask=next_attention_mask)["last_hidden_state"]
output_from_past = model(next_tokens, attention_mask=next_attention_mask, past_key_values=past_key_values)["last_hidden_state"]
# select random slice
random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach()
output_from_past_slice = output_from_past[:, :, random_slice_idx].detach()
self.parent.assertTrue(output_from_past_slice.shape[1] == next_tokens.shape[1])
# test that outputs are equal for slice
self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-2))
def check_encoder_decoder_model_standalone(self, config, inputs_dict):
model = {{cookiecutter.camelcase_modelname}}Model(config=config).to(torch_device).eval()
outputs = model(**inputs_dict)
encoder_last_hidden_state = outputs.encoder_last_hidden_state
last_hidden_state = outputs.last_hidden_state
with tempfile.TemporaryDirectory() as tmpdirname:
encoder = model.get_encoder()
encoder.save_pretrained(tmpdirname)
encoder = {{cookiecutter.camelcase_modelname}}Encoder.from_pretrained(tmpdirname).to(torch_device)
encoder_last_hidden_state_2 = encoder(inputs_dict["input_ids"], attention_mask=inputs_dict["attention_mask"])[
0
]
self.parent.assertTrue((encoder_last_hidden_state_2 - encoder_last_hidden_state).abs().max().item() < 1e-3)
with tempfile.TemporaryDirectory() as tmpdirname:
decoder = model.get_decoder()
decoder.save_pretrained(tmpdirname)
decoder = {{cookiecutter.camelcase_modelname}}Decoder.from_pretrained(tmpdirname).to(torch_device)
last_hidden_state_2 = decoder(
input_ids=inputs_dict["decoder_input_ids"],
attention_mask=inputs_dict["decoder_attention_mask"],
encoder_hidden_states=encoder_last_hidden_state,
encoder_attention_mask=inputs_dict["attention_mask"],
)[0]
self.parent.assertTrue((last_hidden_state_2 - last_hidden_state).abs().max().item() < 1e-3)
@require_torch
class {{cookiecutter.camelcase_modelname}}ModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
all_model_classes = (
({{cookiecutter.camelcase_modelname}}Model, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration, {{cookiecutter.camelcase_modelname}}ForSequenceClassification, {{cookiecutter.camelcase_modelname}}ForQuestionAnswering)
if is_torch_available()
else ()
)
all_generative_model_classes = ({{cookiecutter.camelcase_modelname}}ForConditionalGeneration,) if is_torch_available() else ()
is_encoder_decoder = True
test_pruning = False
test_head_masking = False
test_missing_keys = False
def setUp(self):
self.model_tester = {{cookiecutter.camelcase_modelname}}ModelTester(self)
self.config_tester = ConfigTester(self, config_class={{cookiecutter.camelcase_modelname}}Config)
def test_config(self):
self.config_tester.run_common_tests()
def test_initialization_more(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
model = {{cookiecutter.camelcase_modelname}}Model(config)
model.to(torch_device)
model.eval()
# test init
self.assertTrue((model.encoder.embed_tokens.weight == model.shared.weight).all().item())
def _check_var(module):
"""Check that we initialized various parameters from N(0, config.init_std)."""
self.assertAlmostEqual(torch.std(module.weight).item(), config.init_std, 2)
_check_var(model.encoder.embed_tokens)
_check_var(model.encoder.layers[0].self_attn.k_proj)
_check_var(model.encoder.layers[0].fc1)
_check_var(model.encoder.embed_positions)
def test_save_load_strict(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs()
for model_class in self.all_model_classes:
model = model_class(config)
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname)
model2, info = model_class.from_pretrained(tmpdirname, output_loading_info=True)
self.assertEqual(info["missing_keys"], [])
def test_decoder_model_past_with_large_inputs(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
def test_encoder_decoder_model_standalone(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
# {{cookiecutter.camelcase_modelname}}ForSequenceClassification does not support inputs_embeds
def test_inputs_embeds(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in ({{cookiecutter.camelcase_modelname}}Model, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration, {{cookiecutter.camelcase_modelname}}ForQuestionAnswering):
model = model_class(config)
model.to(torch_device)
model.eval()
inputs = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))
if not self.is_encoder_decoder:
input_ids = inputs["input_ids"]
del inputs["input_ids"]
else:
encoder_input_ids = inputs["input_ids"]
decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids)
del inputs["input_ids"]
inputs.pop("decoder_input_ids", None)
wte = model.get_input_embeddings()
if not self.is_encoder_decoder:
inputs["inputs_embeds"] = wte(input_ids)
else:
inputs["inputs_embeds"] = wte(encoder_input_ids)
inputs["decoder_inputs_embeds"] = wte(decoder_input_ids)
with torch.no_grad():
model(**inputs)[0]
def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"]
attention_mask = input_ids.ne(1).to(torch_device)
model = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda":
model.half()
model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
def assert_tensors_close(a, b, atol=1e-12, prefix=""):
"""If tensors have different shapes, different values or a and b are not both tensors, raise a nice Assertion error."""
if a is None and b is None:
return True
try:
if torch.allclose(a, b, atol=atol):
return True
raise
except Exception:
pct_different = (torch.gt((a - b).abs(), atol)).float().mean().item()
if a.numel() > 100:
msg = f"tensor values are {pct_different:.1%} percent different."
else:
msg = f"{a} != {b}"
if prefix:
msg = prefix + ": " + msg
raise AssertionError(msg)
def _long_tensor(tok_lst):
return torch.tensor(tok_lst, dtype=torch.long, device=torch_device)
TOLERANCE = 1e-4
@require_torch
@require_sentencepiece
@require_tokenizers
@slow
class {{cookiecutter.camelcase_modelname}}ModelIntegrationTests(unittest.TestCase):
@cached_property
def default_tokenizer(self):
return {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
def test_inference_no_head(self):
model = {{cookiecutter.camelcase_modelname}}Model.from_pretrained('{{cookiecutter.checkpoint_identifier}}').to(torch_device)
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
decoder_input_ids = _long_tensor([[2, 0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588]])
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
with torch.no_grad():
output = model(**inputs_dict)[0]
expected_shape = torch.Size((1, 11, 1024))
self.assertEqual(output.shape, expected_shape)
# change to expected output here
expected_slice = torch.tensor(
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]], device=torch_device
)
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=TOLERANCE))
def test_inference_head(self):
model = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}').to(torch_device)
# change to intended input
input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
decoder_input_ids = _long_tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
inputs_dict = prepare_{{cookiecutter.lowercase_modelname}}_inputs_dict(model.config, input_ids, decoder_input_ids)
with torch.no_grad():
output = model(**inputs_dict)[0]
expected_shape = torch.Size((1, 11, model.config.vocab_size))
self.assertEqual(output.shape, expected_shape)
# change to expected output here
expected_slice = torch.tensor(
[[0.7144, 0.8143, -1.2813], [0.7144, 0.8143, -1.2813], [-0.0467, 2.5911, -2.1845]], device=torch_device
)
self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=TOLERANCE))
def test_seq_to_seq_generation(self):
hf = {{cookiecutter.camelcase_modelname}}ForConditionalGeneration.from_pretrained('{{cookiecutter.checkpoint_identifier}}').to(torch_device)
tok = {{cookiecutter.camelcase_modelname}}Tokenizer.from_pretrained('{{cookiecutter.checkpoint_identifier}}')
batch_input = [
# string 1,
# string 2,
# string 3,
# string 4,
]
# The below article tests that we don't add any hypotheses outside of the top n_beams
dct = tok.batch_encode_plus(
batch_input,
max_length=512,
padding="max_length",
truncation_strategy="only_first",
truncation=True,
return_tensors="pt",
)
hypotheses_batch = hf.generate(
input_ids=dct["input_ids"].to(torch_device),
attention_mask=dct["attention_mask"].to(torch_device),
num_beams=2,
)
EXPECTED = [
# here expected 1,
# here expected 2,
# here expected 3,
# here expected 4,
]
generated = tok.batch_decode(
hypotheses_batch.tolist(), clean_up_tokenization_spaces=True, skip_special_tokens=True
)
assert generated == EXPECTED
{% endif -%}
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
# To replace in: "src/transformers/__init__.py" # To replace in: "src/transformers/__init__.py"
# Below: "if is_torch_available():" if generating PyTorch # Below: "if is_torch_available():" if generating PyTorch
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" %}
from .models.{{cookiecutter.lowercase_modelname}} import ( from .models.{{cookiecutter.lowercase_modelname}} import (
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST, {{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
{{cookiecutter.camelcase_modelname}}ForMaskedLM, {{cookiecutter.camelcase_modelname}}ForMaskedLM,
...@@ -41,10 +42,20 @@ ...@@ -41,10 +42,20 @@
{{cookiecutter.camelcase_modelname}}PreTrainedModel, {{cookiecutter.camelcase_modelname}}PreTrainedModel,
load_tf_weights_in_{{cookiecutter.lowercase_modelname}}, load_tf_weights_in_{{cookiecutter.lowercase_modelname}},
) )
{% else %}
from .models.{{cookiecutter.lowercase_modelname}} import (
{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
{{cookiecutter.camelcase_modelname}}Model,
)
{% endif -%}
# End. # End.
# Below: "if is_tf_available():" if generating TensorFlow # Below: "if is_tf_available():" if generating TensorFlow
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" %}
from .models.{{cookiecutter.lowercase_modelname}} import ( from .models.{{cookiecutter.lowercase_modelname}} import (
TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST, TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST,
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
...@@ -57,11 +68,19 @@ ...@@ -57,11 +68,19 @@
TF{{cookiecutter.camelcase_modelname}}Model, TF{{cookiecutter.camelcase_modelname}}Model,
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel, TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
) )
{% else %}
from .models.{{cookiecutter.lowercase_modelname}} import (
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
TF{{cookiecutter.camelcase_modelname}}Model,
TF{{cookiecutter.camelcase_modelname}}PreTrainedModel,
)
{% endif -%}
# End. # End.
# Below: "if is_tokenizers_available():" # Below: "if is_tokenizers_available():"
# Replace with: # Replace with:
from models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}TokenizerFast from .models.{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}TokenizerFast
# End.
# Below: "from .models.albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig" # Below: "from .models.albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, AlbertConfig"
# Replace with: # Replace with:
...@@ -101,6 +120,7 @@ from ..{{cookiecutter.lowercase_modelname}}.configuration_{{cookiecutter.lowerca ...@@ -101,6 +120,7 @@ from ..{{cookiecutter.lowercase_modelname}}.configuration_{{cookiecutter.lowerca
# Below: "# Add modeling imports here" # Below: "# Add modeling imports here"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import ( from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
{{cookiecutter.camelcase_modelname}}ForMaskedLM, {{cookiecutter.camelcase_modelname}}ForMaskedLM,
{{cookiecutter.camelcase_modelname}}ForCausalLM, {{cookiecutter.camelcase_modelname}}ForCausalLM,
...@@ -110,6 +130,14 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo ...@@ -110,6 +130,14 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
{{cookiecutter.camelcase_modelname}}ForTokenClassification, {{cookiecutter.camelcase_modelname}}ForTokenClassification,
{{cookiecutter.camelcase_modelname}}Model, {{cookiecutter.camelcase_modelname}}Model,
) )
{% else -%}
from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_modelname}} import (
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering,
{{cookiecutter.camelcase_modelname}}ForSequenceClassification,
{{cookiecutter.camelcase_modelname}}Model,
)
{% endif -%}
# End. # End.
# Below: "# Base model mapping" # Below: "# Base model mapping"
...@@ -119,17 +147,27 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo ...@@ -119,17 +147,27 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
# Below: "# Model with LM heads mapping" # Below: "# Model with LM heads mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM), ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM),
{% else %}
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
{% endif -%}
# End. # End.
# Below: "# Model for Causal LM mapping" # Below: "# Model for Causal LM mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForCausalLM), ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForCausalLM),
{% else -%}
{% endif -%}
# End. # End.
# Below: "# Model for Masked LM mapping" # Below: "# Model for Masked LM mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM), ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMaskedLM),
{% else -%}
{% endif -%}
# End. # End.
# Below: "# Model for Sequence Classification mapping" # Below: "# Model for Sequence Classification mapping"
...@@ -144,14 +182,27 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo ...@@ -144,14 +182,27 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
# Below: "# Model for Token Classification mapping" # Below: "# Model for Token Classification mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForTokenClassification), ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForTokenClassification),
{% else -%}
{% endif -%}
# End. # End.
# Below: "# Model for Multiple Choice mapping" # Below: "# Model for Multiple Choice mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMultipleChoice), ({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForMultipleChoice),
{% else -%}
{% endif -%}
# End. # End.
# Below: "# Model for Seq2Seq Causal LM mapping"
# Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
{% else %}
({{cookiecutter.camelcase_modelname}}Config, {{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
{% endif -%}
# End.
# To replace in: "src/transformers/models/auto/modeling_tf_auto.py" if generating TensorFlow # To replace in: "src/transformers/models/auto/modeling_tf_auto.py" if generating TensorFlow
# Below: "from .configuration_auto import (" # Below: "from .configuration_auto import ("
...@@ -161,6 +212,7 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo ...@@ -161,6 +212,7 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_{{cookiecutter.lowercase_mo
# Below: "# Add modeling imports here" # Below: "# Add modeling imports here"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase_modelname}} import ( from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase_modelname}} import (
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM,
TF{{cookiecutter.camelcase_modelname}}ForCausalLM, TF{{cookiecutter.camelcase_modelname}}ForCausalLM,
...@@ -170,6 +222,12 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase ...@@ -170,6 +222,12 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase
TF{{cookiecutter.camelcase_modelname}}ForTokenClassification, TF{{cookiecutter.camelcase_modelname}}ForTokenClassification,
TF{{cookiecutter.camelcase_modelname}}Model, TF{{cookiecutter.camelcase_modelname}}Model,
) )
{% else -%}
from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase_modelname}} import (
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration,
TF{{cookiecutter.camelcase_modelname}}Model,
)
{% endif -%}
# End. # End.
# Below: "# Base model mapping" # Below: "# Base model mapping"
...@@ -179,35 +237,65 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase ...@@ -179,35 +237,65 @@ from ..{{cookiecutter.lowercase_modelname}}.modeling_tf_{{cookiecutter.lowercase
# Below: "# Model with LM heads mapping" # Below: "# Model with LM heads mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM), ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM),
{% else %}
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
{% endif -%}
# End. # End.
# Below: "# Model for Causal LM mapping" # Below: "# Model for Causal LM mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForCausalLM), ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForCausalLM),
{% else -%}
{% endif -%}
# End. # End.
# Below: "# Model for Masked LM mapping" # Below: "# Model for Masked LM mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM), ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMaskedLM),
{% else -%}
{% endif -%}
# End. # End.
# Below: "# Model for Sequence Classification mapping" # Below: "# Model for Sequence Classification mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification), ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification),
{% else -%}
{% endif -%}
# End. # End.
# Below: "# Model for Question Answering mapping" # Below: "# Model for Question Answering mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering), ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering),
{% else -%}
{% endif -%}
# End. # End.
# Below: "# Model for Token Classification mapping" # Below: "# Model for Token Classification mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForTokenClassification), ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForTokenClassification),
{% else -%}
{% endif -%}
# End. # End.
# Below: "# Model for Multiple Choice mapping" # Below: "# Model for Multiple Choice mapping"
# Replace with: # Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice), ({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice),
{% else -%}
{% endif -%}
# End.
# Below: "# Model for Seq2Seq Causal LM mapping"
# Replace with:
{% if cookiecutter.is_encoder_decoder_model == "False" -%}
{% else %}
({{cookiecutter.camelcase_modelname}}Config, TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration),
{% endif -%}
# End. # End.
...@@ -56,6 +56,47 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast): ...@@ -56,6 +56,47 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(BertTokenizerFast):
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
from ...utils import logging
from ..bart.tokenization_bart_fast import BartTokenizerFast
from .tokenization_{{cookiecutter.lowercase_modelname}} import {{cookiecutter.camelcase_modelname}}Tokenizer
logger = logging.get_logger(__name__)
PRETRAINED_VOCAB_FILES_MAP = {
"vocab_file": {
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.json",
},
"merges_file": {
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/merges.txt",
},
"tokenizer_file": {
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/tokenizer.json",
},
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"{{cookiecutter.checkpoint_identifier}}": 1024,
}
class {{cookiecutter.camelcase_modelname}}TokenizerFast(BartTokenizerFast):
r"""
Construct a "fast" {{cookiecutter.modelname}} tokenizer (backed by HuggingFace's `tokenizers` library).
:class:`~transformers.{{cookiecutter.camelcase_modelname}}TokenizerFast` is identical to :class:`~transformers.BartTokenizerFast` and runs
end-to-end tokenization: punctuation splitting and wordpiece.
Refer to superclass :class:`~transformers.BartTokenizerFast` for usage examples and documentation concerning
parameters.
"""
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
slow_tokenizer_class = {{cookiecutter.camelcase_modelname}}Tokenizer
{%- elif cookiecutter.tokenizer_type == "Standalone" %} {%- elif cookiecutter.tokenizer_type == "Standalone" %}
from typing import List, Optional from typing import List, Optional
......
...@@ -54,6 +54,45 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(BertTokenizer): ...@@ -54,6 +54,45 @@ class {{cookiecutter.camelcase_modelname}}Tokenizer(BertTokenizer):
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION pretrained_init_configuration = PRETRAINED_INIT_CONFIGURATION
{%- elif cookiecutter.tokenizer_type == "Based on BART" %}
from ...utils import logging
from ..bart.tokenization_bart import BartTokenizer
logger = logging.get_logger(__name__)
PRETRAINED_VOCAB_FILES_MAP = {
"vocab_file": {
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/vocab.json",
},
"merges_file": {
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/merges.txt",
},
"tokenizer_file": {
"{{cookiecutter.checkpoint_identifier}}": "https://huggingface.co/{{cookiecutter.checkpoint_identifier}}/resolve/main/tokenizer.json",
},
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"{{cookiecutter.checkpoint_identifier}}": 1024,
}
class {{cookiecutter.camelcase_modelname}}Tokenizer(BartTokenizer):
"""
Construct a {{cookiecutter.modelname}} tokenizer.
:class:`~transformers.{{cookiecutter.camelcase_modelname}}Tokenizer` is identical to :class:`~transformers.BartTokenizer` and runs end-to-end
tokenization: punctuation splitting and wordpiece.
Refer to superclass :class:`~transformers.BartTokenizer` for usage examples and documentation concerning
parameters.
"""
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
{%- elif cookiecutter.tokenizer_type == "Standalone" %} {%- elif cookiecutter.tokenizer_type == "Standalone" %}
from typing import List, Optional from typing import List, Optional
...@@ -289,5 +328,4 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast) ...@@ -289,5 +328,4 @@ class {{cookiecutter.camelcase_modelname}}TokenizerFast(PreTrainedTokenizerFast)
return len(cls + token_ids_0 + sep) * [0] return len(cls + token_ids_0 + sep) * [0]
return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0] return len(cls + token_ids_0 + sep + sep + token_ids_1 + sep) * [0]
{% endif %} {% endif %}
...@@ -57,7 +57,7 @@ Tips: ...@@ -57,7 +57,7 @@ Tips:
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}Model .. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}Model
:members: forward :members: forward
{% if cookiecutter.is_encoder_decoder_model == "False" %}
{{cookiecutter.camelcase_modelname}}ForCausalLM {{cookiecutter.camelcase_modelname}}ForCausalLM
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...@@ -83,7 +83,7 @@ Tips: ...@@ -83,7 +83,7 @@ Tips:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForMultipleChoice .. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForMultipleChoice
:members: :members: forward
{{cookiecutter.camelcase_modelname}}ForTokenClassification {{cookiecutter.camelcase_modelname}}ForTokenClassification
...@@ -99,6 +99,29 @@ Tips: ...@@ -99,6 +99,29 @@ Tips:
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForQuestionAnswering .. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
:members: forward :members: forward
{%- else %}
{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
:members: forward
{{cookiecutter.camelcase_modelname}}ForSequenceClassification
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForSequenceClassification
:members: forward
{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
:members: forward
{% endif -%}
{% endif -%} {% endif -%}
{% if "TensorFlow" in cookiecutter.generate_tensorflow_and_pytorch -%} {% if "TensorFlow" in cookiecutter.generate_tensorflow_and_pytorch -%}
...@@ -108,7 +131,7 @@ TF{{cookiecutter.camelcase_modelname}}Model ...@@ -108,7 +131,7 @@ TF{{cookiecutter.camelcase_modelname}}Model
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}Model .. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}Model
:members: call :members: call
{% if cookiecutter.is_encoder_decoder_model == "False" %}
TF{{cookiecutter.camelcase_modelname}}ForMaskedLM TF{{cookiecutter.camelcase_modelname}}ForMaskedLM
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
...@@ -120,7 +143,7 @@ TF{{cookiecutter.camelcase_modelname}}ForCausalLM ...@@ -120,7 +143,7 @@ TF{{cookiecutter.camelcase_modelname}}ForCausalLM
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForCausalLM .. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForCausalLM
:members: forward :members: call
TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification TF{{cookiecutter.camelcase_modelname}}ForSequenceClassification
...@@ -151,4 +174,11 @@ TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering ...@@ -151,4 +174,11 @@ TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering
:members: call :members: call
{%- else %}
TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration
:members: call
{% endif -%}
{% endif -%} {% endif -%}
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
"camelcase_modelname": "BrandNewBert", "camelcase_modelname": "BrandNewBert",
"authors": "The HuggingFace Team", "authors": "The HuggingFace Team",
"checkpoint_identifier": "brand-new-bert-base-cased", "checkpoint_identifier": "brand-new-bert-base-cased",
"tokenizer_type": ["Based on BERT", "Standalone"], "tokenizer_type": ["Based on BERT", "Based on BART", "Standalone"],
"generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"] "generate_tensorflow_and_pytorch": ["PyTorch & TensorFlow", "PyTorch", "TensorFlow"],
} "is_encoder_decoder_model": ["True", "False"]
\ No newline at end of file }
...@@ -6,5 +6,6 @@ ...@@ -6,5 +6,6 @@
"authors": "The HuggingFace Team", "authors": "The HuggingFace Team",
"checkpoint_identifier": "brand-new-bert-base-cased", "checkpoint_identifier": "brand-new-bert-base-cased",
"tokenizer_type": "Based on BERT", "tokenizer_type": "Based on BERT",
"generate_tensorflow_and_pytorch": "PyTorch & TensorFlow" "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow",
"is_encoder_decoder_model": "False"
} }
...@@ -6,5 +6,6 @@ ...@@ -6,5 +6,6 @@
"authors": "The HuggingFace Team", "authors": "The HuggingFace Team",
"checkpoint_identifier": "brand-new-bert-base-cased", "checkpoint_identifier": "brand-new-bert-base-cased",
"tokenizer_type": "Based on BERT", "tokenizer_type": "Based on BERT",
"generate_tensorflow_and_pytorch": "PyTorch" "generate_tensorflow_and_pytorch": "PyTorch",
"is_encoder_decoder_model": "False"
} }
{
"modelname": "NewENCDEC",
"uppercase_modelname": "NEW_ENC_DEC",
"lowercase_modelname": "new_enc_dec",
"camelcase_modelname": "NewEncDec",
"authors": "The HuggingFace Team",
"checkpoint_identifier": "new-enc-dec-base",
"tokenizer_type": "Based on BART",
"generate_tensorflow_and_pytorch": "PyTorch",
"is_encoder_decoder_model": "True"
}
...@@ -6,5 +6,6 @@ ...@@ -6,5 +6,6 @@
"authors": "The HuggingFace Team", "authors": "The HuggingFace Team",
"checkpoint_identifier": "bi-brand-new-bert-base-cased", "checkpoint_identifier": "bi-brand-new-bert-base-cased",
"tokenizer_type": "Standalone", "tokenizer_type": "Standalone",
"generate_tensorflow_and_pytorch": "PyTorch & TensorFlow" "generate_tensorflow_and_pytorch": "PyTorch & TensorFlow",
"is_encoder_decoder_model": "False"
} }
...@@ -6,5 +6,6 @@ ...@@ -6,5 +6,6 @@
"authors": "The HuggingFace Team", "authors": "The HuggingFace Team",
"checkpoint_identifier": "brand-new-bert-base-cased", "checkpoint_identifier": "brand-new-bert-base-cased",
"tokenizer_type": "Based on BERT", "tokenizer_type": "Based on BERT",
"generate_tensorflow_and_pytorch": "TensorFlow" "generate_tensorflow_and_pytorch": "TensorFlow",
"is_encoder_decoder_model": "False"
} }
{
"modelname": "NewTFENCDEC",
"uppercase_modelname": "NEW_TF_ENC_DEC",
"lowercase_modelname": "new_tf_enc_dec",
"camelcase_modelname": "NewTFEncDec",
"authors": "The HuggingFace Team",
"checkpoint_identifier": "new-tf-enc-dec-base",
"tokenizer_type": "Based on BART",
"generate_tensorflow_and_pytorch": "TensorFlow",
"is_encoder_decoder_model": "True"
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment