"examples/research_projects/adversarial/utils_hans.py" did not exist on "4731a00c3e23bb5c4dd8d0e43a95e777a8a8f02a"
Unverified Commit 415e9a09 authored by Matt's avatar Matt Committed by GitHub
Browse files

Add tf_keras imports to prepare for Keras 3 (#28588)

* Port core files + ESM (because ESM code is odd)

* Search-replace in modelling code

* Fix up transfo_xl as well

* Fix other core files + tests (still need to add correct import to tests)

* Fix cookiecutter

* make fixup, fix imports in some more core files

* Auto-add imports to tests

* Cleanup, add imports to sagemaker tests

* Use correct exception for importing tf_keras

* Fixes in modeling_tf_utils

* make fixup

* Correct version parsing code

* Ensure the pipeline tests correctly revert to float32 after each test

* Ensure the pipeline tests correctly revert to float32 after each test

* More tf.keras -> keras

* Add dtype cast

* Better imports of tf_keras

* Add a cast for tf.assign, just in case

* Fix callback imports
parent 1d489b3e
......@@ -46,6 +46,7 @@ from ...modeling_tf_utils import (
TFSequenceClassificationLoss,
TFTokenClassificationLoss,
get_initializer,
keras,
keras_serializable,
unpack_inputs,
)
......@@ -80,7 +81,7 @@ XLM_ROBERTA_START_DOCSTRING = r"""
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
etc.)
This model is also a [tf.keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
behavior.
......@@ -162,7 +163,7 @@ XLM_ROBERTA_INPUTS_DOCSTRING = r"""
# Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaEmbeddings with Roberta->XLMRoberta
class TFXLMRobertaEmbeddings(tf.keras.layers.Layer):
class TFXLMRobertaEmbeddings(keras.layers.Layer):
"""
Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
"""
......@@ -175,8 +176,8 @@ class TFXLMRobertaEmbeddings(tf.keras.layers.Layer):
self.hidden_size = config.hidden_size
self.max_position_embeddings = config.max_position_embeddings
self.initializer_range = config.initializer_range
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.LayerNorm = keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)
def build(self, input_shape=None):
with tf.name_scope("word_embeddings"):
......@@ -268,11 +269,11 @@ class TFXLMRobertaEmbeddings(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertPooler with Bert->XLMRoberta
class TFXLMRobertaPooler(tf.keras.layers.Layer):
class TFXLMRobertaPooler(keras.layers.Layer):
def __init__(self, config: XLMRobertaConfig, **kwargs):
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
units=config.hidden_size,
kernel_initializer=get_initializer(config.initializer_range),
activation="tanh",
......@@ -298,7 +299,7 @@ class TFXLMRobertaPooler(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfAttention with Bert->XLMRoberta
class TFXLMRobertaSelfAttention(tf.keras.layers.Layer):
class TFXLMRobertaSelfAttention(keras.layers.Layer):
def __init__(self, config: XLMRobertaConfig, **kwargs):
super().__init__(**kwargs)
......@@ -313,16 +314,16 @@ class TFXLMRobertaSelfAttention(tf.keras.layers.Layer):
self.all_head_size = self.num_attention_heads * self.attention_head_size
self.sqrt_att_head_size = math.sqrt(self.attention_head_size)
self.query = tf.keras.layers.Dense(
self.query = keras.layers.Dense(
units=self.all_head_size, kernel_initializer=get_initializer(config.initializer_range), name="query"
)
self.key = tf.keras.layers.Dense(
self.key = keras.layers.Dense(
units=self.all_head_size, kernel_initializer=get_initializer(config.initializer_range), name="key"
)
self.value = tf.keras.layers.Dense(
self.value = keras.layers.Dense(
units=self.all_head_size, kernel_initializer=get_initializer(config.initializer_range), name="value"
)
self.dropout = tf.keras.layers.Dropout(rate=config.attention_probs_dropout_prob)
self.dropout = keras.layers.Dropout(rate=config.attention_probs_dropout_prob)
self.is_decoder = config.is_decoder
self.config = config
......@@ -431,15 +432,15 @@ class TFXLMRobertaSelfAttention(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput with Bert->XLMRoberta
class TFXLMRobertaSelfOutput(tf.keras.layers.Layer):
class TFXLMRobertaSelfOutput(keras.layers.Layer):
def __init__(self, config: XLMRobertaConfig, **kwargs):
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
units=config.hidden_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
)
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.LayerNorm = keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.config = config
def call(self, hidden_states: tf.Tensor, input_tensor: tf.Tensor, training: bool = False) -> tf.Tensor:
......@@ -462,7 +463,7 @@ class TFXLMRobertaSelfOutput(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertAttention with Bert->XLMRoberta
class TFXLMRobertaAttention(tf.keras.layers.Layer):
class TFXLMRobertaAttention(keras.layers.Layer):
def __init__(self, config: XLMRobertaConfig, **kwargs):
super().__init__(**kwargs)
......@@ -514,11 +515,11 @@ class TFXLMRobertaAttention(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertIntermediate with Bert->XLMRoberta
class TFXLMRobertaIntermediate(tf.keras.layers.Layer):
class TFXLMRobertaIntermediate(keras.layers.Layer):
def __init__(self, config: XLMRobertaConfig, **kwargs):
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
units=config.intermediate_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
)
......@@ -544,15 +545,15 @@ class TFXLMRobertaIntermediate(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertOutput with Bert->XLMRoberta
class TFXLMRobertaOutput(tf.keras.layers.Layer):
class TFXLMRobertaOutput(keras.layers.Layer):
def __init__(self, config: XLMRobertaConfig, **kwargs):
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
units=config.hidden_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
)
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.LayerNorm = keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.config = config
def call(self, hidden_states: tf.Tensor, input_tensor: tf.Tensor, training: bool = False) -> tf.Tensor:
......@@ -575,7 +576,7 @@ class TFXLMRobertaOutput(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertLayer with Bert->XLMRoberta
class TFXLMRobertaLayer(tf.keras.layers.Layer):
class TFXLMRobertaLayer(keras.layers.Layer):
def __init__(self, config: XLMRobertaConfig, **kwargs):
super().__init__(**kwargs)
......@@ -679,7 +680,7 @@ class TFXLMRobertaLayer(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEncoder with Bert->XLMRoberta
class TFXLMRobertaEncoder(tf.keras.layers.Layer):
class TFXLMRobertaEncoder(keras.layers.Layer):
def __init__(self, config: XLMRobertaConfig, **kwargs):
super().__init__(**kwargs)
self.config = config
......@@ -759,7 +760,7 @@ class TFXLMRobertaEncoder(tf.keras.layers.Layer):
@keras_serializable
# Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaMainLayer with Roberta->XLMRoberta
class TFXLMRobertaMainLayer(tf.keras.layers.Layer):
class TFXLMRobertaMainLayer(keras.layers.Layer):
config_class = XLMRobertaConfig
def __init__(self, config, add_pooling_layer=True, **kwargs):
......@@ -779,7 +780,7 @@ class TFXLMRobertaMainLayer(tf.keras.layers.Layer):
self.embeddings = TFXLMRobertaEmbeddings(config, name="embeddings")
# Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.get_input_embeddings
def get_input_embeddings(self) -> tf.keras.layers.Layer:
def get_input_embeddings(self) -> keras.layers.Layer:
return self.embeddings
# Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.set_input_embeddings
......@@ -1063,7 +1064,7 @@ class TFXLMRobertaModel(TFXLMRobertaPreTrainedModel):
# Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaLMHead with Roberta->XLMRoberta
class TFXLMRobertaLMHead(tf.keras.layers.Layer):
class TFXLMRobertaLMHead(keras.layers.Layer):
"""XLMRoberta Head for masked language modeling."""
def __init__(self, config, input_embeddings, **kwargs):
......@@ -1071,10 +1072,10 @@ class TFXLMRobertaLMHead(tf.keras.layers.Layer):
self.config = config
self.hidden_size = config.hidden_size
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
config.hidden_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
)
self.layer_norm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm")
self.layer_norm = keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm")
self.act = get_tf_activation("gelu")
# The output weights are the same as the input embeddings, but there is
......@@ -1352,12 +1353,12 @@ class TFXLMRobertaForCausalLM(TFXLMRobertaPreTrainedModel, TFCausalLanguageModel
# Copied from transformers.models.roberta.modeling_tf_roberta.TFRobertaClassificationHead with Roberta->XLMRoberta
class TFXLMRobertaClassificationHead(tf.keras.layers.Layer):
class TFXLMRobertaClassificationHead(keras.layers.Layer):
"""Head for sentence-level classification tasks."""
def __init__(self, config, **kwargs):
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
config.hidden_size,
kernel_initializer=get_initializer(config.initializer_range),
activation="tanh",
......@@ -1366,8 +1367,8 @@ class TFXLMRobertaClassificationHead(tf.keras.layers.Layer):
classifier_dropout = (
config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
)
self.dropout = tf.keras.layers.Dropout(classifier_dropout)
self.out_proj = tf.keras.layers.Dense(
self.dropout = keras.layers.Dropout(classifier_dropout)
self.out_proj = keras.layers.Dense(
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="out_proj"
)
self.config = config
......@@ -1497,8 +1498,8 @@ class TFXLMRobertaForMultipleChoice(TFXLMRobertaPreTrainedModel, TFMultipleChoic
super().__init__(config, *inputs, **kwargs)
self.roberta = TFXLMRobertaMainLayer(config, name="roberta")
self.dropout = tf.keras.layers.Dropout(config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(
self.dropout = keras.layers.Dropout(config.hidden_dropout_prob)
self.classifier = keras.layers.Dense(
1, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
)
self.config = config
......@@ -1606,8 +1607,8 @@ class TFXLMRobertaForTokenClassification(TFXLMRobertaPreTrainedModel, TFTokenCla
classifier_dropout = (
config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
)
self.dropout = tf.keras.layers.Dropout(classifier_dropout)
self.classifier = tf.keras.layers.Dense(
self.dropout = keras.layers.Dropout(classifier_dropout)
self.classifier = keras.layers.Dense(
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
)
self.config = config
......@@ -1698,7 +1699,7 @@ class TFXLMRobertaForQuestionAnswering(TFXLMRobertaPreTrainedModel, TFQuestionAn
self.num_labels = config.num_labels
self.roberta = TFXLMRobertaMainLayer(config, add_pooling_layer=False, name="roberta")
self.qa_outputs = tf.keras.layers.Dense(
self.qa_outputs = keras.layers.Dense(
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
)
self.config = config
......
......@@ -39,6 +39,7 @@ from ...modeling_tf_utils import (
TFSharedEmbeddings,
TFTokenClassificationLoss,
get_initializer,
keras,
keras_serializable,
unpack_inputs,
)
......@@ -66,7 +67,7 @@ TF_XLNET_PRETRAINED_MODEL_ARCHIVE_LIST = [
]
class TFXLNetRelativeAttention(tf.keras.layers.Layer):
class TFXLNetRelativeAttention(keras.layers.Layer):
def __init__(self, config, **kwargs):
super().__init__(**kwargs)
......@@ -83,8 +84,8 @@ class TFXLNetRelativeAttention(tf.keras.layers.Layer):
self.initializer_range = config.initializer_range
self.output_attentions = config.output_attentions
self.layer_norm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm")
self.dropout = tf.keras.layers.Dropout(config.dropout)
self.layer_norm = keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm")
self.dropout = keras.layers.Dropout(config.dropout)
self.config = config
def build(self, input_shape=None):
......@@ -336,17 +337,17 @@ class TFXLNetRelativeAttention(tf.keras.layers.Layer):
return outputs
class TFXLNetFeedForward(tf.keras.layers.Layer):
class TFXLNetFeedForward(keras.layers.Layer):
def __init__(self, config, **kwargs):
super().__init__(**kwargs)
self.layer_norm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm")
self.layer_1 = tf.keras.layers.Dense(
self.layer_norm = keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="layer_norm")
self.layer_1 = keras.layers.Dense(
config.d_inner, kernel_initializer=get_initializer(config.initializer_range), name="layer_1"
)
self.layer_2 = tf.keras.layers.Dense(
self.layer_2 = keras.layers.Dense(
config.d_model, kernel_initializer=get_initializer(config.initializer_range), name="layer_2"
)
self.dropout = tf.keras.layers.Dropout(config.dropout)
self.dropout = keras.layers.Dropout(config.dropout)
if isinstance(config.ff_activation, str):
self.activation_function = get_tf_activation(config.ff_activation)
else:
......@@ -378,12 +379,12 @@ class TFXLNetFeedForward(tf.keras.layers.Layer):
self.layer_2.build([None, None, self.config.d_inner])
class TFXLNetLayer(tf.keras.layers.Layer):
class TFXLNetLayer(keras.layers.Layer):
def __init__(self, config, **kwargs):
super().__init__(**kwargs)
self.rel_attn = TFXLNetRelativeAttention(config, name="rel_attn")
self.ff = TFXLNetFeedForward(config, name="ff")
self.dropout = tf.keras.layers.Dropout(config.dropout)
self.dropout = keras.layers.Dropout(config.dropout)
def call(
self,
......@@ -433,7 +434,7 @@ class TFXLNetLayer(tf.keras.layers.Layer):
self.ff.build(None)
class TFXLNetLMHead(tf.keras.layers.Layer):
class TFXLNetLMHead(keras.layers.Layer):
def __init__(self, config, input_embeddings, **kwargs):
super().__init__(**kwargs)
self.config = config
......@@ -466,7 +467,7 @@ class TFXLNetLMHead(tf.keras.layers.Layer):
@keras_serializable
class TFXLNetMainLayer(tf.keras.layers.Layer):
class TFXLNetMainLayer(keras.layers.Layer):
config_class = XLNetConfig
def __init__(self, config, **kwargs):
......@@ -492,7 +493,7 @@ class TFXLNetMainLayer(tf.keras.layers.Layer):
config.vocab_size, config.d_model, initializer_range=config.initializer_range, name="word_embedding"
)
self.layer = [TFXLNetLayer(config, name=f"layer_._{i}") for i in range(config.n_layer)]
self.dropout = tf.keras.layers.Dropout(config.dropout)
self.dropout = keras.layers.Dropout(config.dropout)
self.use_mems_eval = config.use_mems_eval
self.use_mems_train = config.use_mems_train
......@@ -1059,7 +1060,7 @@ XLNET_START_DOCSTRING = r"""
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
etc.)
This model is also a [tf.keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use it
as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage and
behavior.
......@@ -1415,7 +1416,7 @@ class TFXLNetForSequenceClassification(TFXLNetPreTrainedModel, TFSequenceClassif
self.sequence_summary = TFSequenceSummary(
config, initializer_range=config.initializer_range, name="sequence_summary"
)
self.logits_proj = tf.keras.layers.Dense(
self.logits_proj = keras.layers.Dense(
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="logits_proj"
)
self.config = config
......@@ -1516,7 +1517,7 @@ class TFXLNetForMultipleChoice(TFXLNetPreTrainedModel, TFMultipleChoiceLoss):
self.sequence_summary = TFSequenceSummary(
config, initializer_range=config.initializer_range, name="sequence_summary"
)
self.logits_proj = tf.keras.layers.Dense(
self.logits_proj = keras.layers.Dense(
1, kernel_initializer=get_initializer(config.initializer_range), name="logits_proj"
)
self.config = config
......@@ -1630,7 +1631,7 @@ class TFXLNetForTokenClassification(TFXLNetPreTrainedModel, TFTokenClassificatio
self.num_labels = config.num_labels
self.transformer = TFXLNetMainLayer(config, name="transformer")
self.classifier = tf.keras.layers.Dense(
self.classifier = keras.layers.Dense(
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
)
self.config = config
......@@ -1720,7 +1721,7 @@ class TFXLNetForQuestionAnsweringSimple(TFXLNetPreTrainedModel, TFQuestionAnswer
def __init__(self, config, *inputs, **kwargs):
super().__init__(config, *inputs, **kwargs)
self.transformer = TFXLNetMainLayer(config, name="transformer")
self.qa_outputs = tf.keras.layers.Dense(
self.qa_outputs = keras.layers.Dense(
config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
)
self.config = config
......
......@@ -22,12 +22,14 @@ import tensorflow as tf
try:
from tf_keras.optimizers.legacy import Adam
except (ImportError, ModuleNotFoundError):
from tensorflow.keras.optimizers.legacy import Adam
except ImportError:
from tensorflow.keras.optimizers import Adam
from .modeling_tf_utils import keras
class WarmUp(tf.keras.optimizers.schedules.LearningRateSchedule):
class WarmUp(keras.optimizers.schedules.LearningRateSchedule):
"""
Applies a warmup schedule on a given learning rate decay schedule.
......@@ -131,7 +133,7 @@ def create_optimizer(
applied to all parameters except bias and layer norm parameters.
"""
# Implements linear decay of the learning rate.
lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
lr_schedule = keras.optimizers.schedules.PolynomialDecay(
initial_learning_rate=init_lr,
decay_steps=num_train_steps - num_warmup_steps,
end_learning_rate=init_lr * min_lr_ratio,
......@@ -156,7 +158,7 @@ def create_optimizer(
include_in_weight_decay=include_in_weight_decay,
)
else:
optimizer = tf.keras.optimizers.Adam(
optimizer = keras.optimizers.Adam(
learning_rate=lr_schedule,
beta_1=adam_beta1,
beta_2=adam_beta2,
......@@ -180,7 +182,7 @@ class AdamWeightDecay(Adam):
to adding the square of the weights to the loss with plain (non-momentum) SGD.
Args:
learning_rate (`Union[float, tf.keras.optimizers.schedules.LearningRateSchedule]`, *optional*, defaults to 0.001):
learning_rate (`Union[float, keras.optimizers.schedules.LearningRateSchedule]`, *optional*, defaults to 0.001):
The learning rate to use or a schedule.
beta_1 (`float`, *optional*, defaults to 0.9):
The beta1 parameter in Adam, which is the exponential decay rate for the 1st momentum estimates.
......@@ -210,7 +212,7 @@ class AdamWeightDecay(Adam):
def __init__(
self,
learning_rate: Union[float, tf.keras.optimizers.schedules.LearningRateSchedule] = 0.001,
learning_rate: Union[float, keras.optimizers.schedules.LearningRateSchedule] = 0.001,
beta_1: float = 0.9,
beta_2: float = 0.999,
epsilon: float = 1e-7,
......
......@@ -25,6 +25,8 @@ logger = logging.get_logger(__name__)
if is_tf_available():
import tensorflow as tf
from .modeling_tf_utils import keras
@dataclass
class TFTrainingArguments(TrainingArguments):
......@@ -195,7 +197,7 @@ class TFTrainingArguments(TrainingArguments):
# Set to float16 at first
if self.fp16:
tf.keras.mixed_precision.set_global_policy("mixed_float16")
keras.mixed_precision.set_global_policy("mixed_float16")
if self.no_cuda:
strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
......@@ -216,7 +218,7 @@ class TFTrainingArguments(TrainingArguments):
if tpu:
# Set to bfloat16 in case of TPU
if self.fp16:
tf.keras.mixed_precision.set_global_policy("mixed_bfloat16")
keras.mixed_precision.set_global_policy("mixed_bfloat16")
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
......
......@@ -50,6 +50,7 @@ from ...modeling_tf_utils import (
TFSequenceSummary,
TFTokenClassificationLoss,
get_initializer,
keras,
keras_serializable,
unpack_inputs,
)
......@@ -70,7 +71,7 @@ TF_{{cookiecutter.uppercase_modelname}}_PRETRAINED_MODEL_ARCHIVE_LIST = [
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}Embeddings(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}Embeddings(keras.layers.Layer):
"""Construct the embeddings from word, position and token_type embeddings."""
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
......@@ -81,8 +82,8 @@ class TF{{cookiecutter.camelcase_modelname}}Embeddings(tf.keras.layers.Layer):
self.hidden_size = config.hidden_size
self.max_position_embeddings = config.max_position_embeddings
self.initializer_range = config.initializer_range
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.LayerNorm = keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)
def build(self, input_shape: tf.TensorShape):
with tf.name_scope("word_embeddings"):
......@@ -149,7 +150,7 @@ class TF{{cookiecutter.camelcase_modelname}}Embeddings(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfAttention with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}SelfAttention(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
......@@ -164,16 +165,16 @@ class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer)
self.all_head_size = self.num_attention_heads * self.attention_head_size
self.sqrt_att_head_size = math.sqrt(self.attention_head_size)
self.query = tf.keras.layers.Dense(
self.query = keras.layers.Dense(
units=self.all_head_size, kernel_initializer=get_initializer(config.initializer_range), name="query"
)
self.key = tf.keras.layers.Dense(
self.key = keras.layers.Dense(
units=self.all_head_size, kernel_initializer=get_initializer(config.initializer_range), name="key"
)
self.value = tf.keras.layers.Dense(
self.value = keras.layers.Dense(
units=self.all_head_size, kernel_initializer=get_initializer(config.initializer_range), name="value"
)
self.dropout = tf.keras.layers.Dropout(rate=config.attention_probs_dropout_prob)
self.dropout = keras.layers.Dropout(rate=config.attention_probs_dropout_prob)
self.is_decoder = config.is_decoder
......@@ -267,15 +268,15 @@ class TF{{cookiecutter.camelcase_modelname}}SelfAttention(tf.keras.layers.Layer)
# Copied from transformers.models.bert.modeling_tf_bert.TFBertSelfOutput with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}SelfOutput(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}SelfOutput(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
units=config.hidden_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
)
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.LayerNorm = keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)
def call(self, hidden_states: tf.Tensor, input_tensor: tf.Tensor, training: bool = False) -> tf.Tensor:
hidden_states = self.dense(inputs=hidden_states)
......@@ -286,7 +287,7 @@ class TF{{cookiecutter.camelcase_modelname}}SelfOutput(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertAttention with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}Attention(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}Attention(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
......@@ -327,11 +328,11 @@ class TF{{cookiecutter.camelcase_modelname}}Attention(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertIntermediate with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}Intermediate(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}Intermediate(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
units=config.intermediate_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
)
......@@ -348,15 +349,15 @@ class TF{{cookiecutter.camelcase_modelname}}Intermediate(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertOutput with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}Output(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}Output(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
units=config.hidden_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
)
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.LayerNorm = keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)
def call(self, hidden_states: tf.Tensor, input_tensor: tf.Tensor, training: bool = False) -> tf.Tensor:
hidden_states = self.dense(inputs=hidden_states)
......@@ -367,7 +368,7 @@ class TF{{cookiecutter.camelcase_modelname}}Output(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertLayer with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}Layer(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}Layer(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
......@@ -454,7 +455,7 @@ class TF{{cookiecutter.camelcase_modelname}}Layer(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEncoder with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}Encoder(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
self.config = config
......@@ -524,11 +525,11 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
# Copied from transformers.models.bert.modeling_tf_bert.TFBertPredictionHeadTransform with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}PredictionHeadTransform(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}PredictionHeadTransform(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
units=config.hidden_size,
kernel_initializer=get_initializer(config.initializer_range),
name="dense",
......@@ -539,7 +540,7 @@ class TF{{cookiecutter.camelcase_modelname}}PredictionHeadTransform(tf.keras.lay
else:
self.transform_act_fn = config.hidden_act
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
self.LayerNorm = keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
def call(self, hidden_states: tf.Tensor) -> tf.Tensor:
hidden_states = self.dense(inputs=hidden_states)
......@@ -550,8 +551,8 @@ class TF{{cookiecutter.camelcase_modelname}}PredictionHeadTransform(tf.keras.lay
# Copied from transformers.models.bert.modeling_tf_bert.TFBertLMPredictionHead with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}LMPredictionHead(tf.keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, input_embeddings: tf.keras.layers.Layer, **kwargs):
class TF{{cookiecutter.camelcase_modelname}}LMPredictionHead(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, input_embeddings: keras.layers.Layer, **kwargs):
super().__init__(**kwargs)
self.vocab_size = config.vocab_size
......@@ -568,7 +569,7 @@ class TF{{cookiecutter.camelcase_modelname}}LMPredictionHead(tf.keras.layers.Lay
super().build(input_shape)
def get_output_embeddings(self) -> tf.keras.layers.Layer:
def get_output_embeddings(self) -> keras.layers.Layer:
return self.input_embeddings
def set_output_embeddings(self, value: tf.Variable):
......@@ -594,8 +595,8 @@ class TF{{cookiecutter.camelcase_modelname}}LMPredictionHead(tf.keras.layers.Lay
# Copied from transformers.models.bert.modeling_tf_bert.TFBertMLMHead with Bert->{{cookiecutter.camelcase_modelname}}
class TF{{cookiecutter.camelcase_modelname}}MLMHead(tf.keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, input_embeddings: tf.keras.layers.Layer, **kwargs):
class TF{{cookiecutter.camelcase_modelname}}MLMHead(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, input_embeddings: keras.layers.Layer, **kwargs):
super().__init__(**kwargs)
self.predictions = TF{{cookiecutter.camelcase_modelname}}LMPredictionHead(config, input_embeddings, name="predictions")
......@@ -607,7 +608,7 @@ class TF{{cookiecutter.camelcase_modelname}}MLMHead(tf.keras.layers.Layer):
@keras_serializable
class TF{{cookiecutter.camelcase_modelname}}MainLayer(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}MainLayer(keras.layers.Layer):
config_class = {{cookiecutter.camelcase_modelname}}Config
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, add_pooling_layer: bool = True, **kwargs):
......@@ -620,7 +621,7 @@ class TF{{cookiecutter.camelcase_modelname}}MainLayer(tf.keras.layers.Layer):
self.encoder = TF{{cookiecutter.camelcase_modelname}}Encoder(config, name="encoder")
# Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.get_input_embeddings
def get_input_embeddings(self) -> tf.keras.layers.Layer:
def get_input_embeddings(self) -> keras.layers.Layer:
return self.embeddings
# Copied from transformers.models.bert.modeling_tf_bert.TFBertMainLayer.set_input_embeddings
......@@ -811,7 +812,7 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
generic methods the library implements for all its model (such as downloading or saving, resizing the input
embeddings, pruning heads etc.)
This model is also a [tf.keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass.
This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass.
Use it as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general
usage and behavior.
......@@ -991,7 +992,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForMaskedLM(TF{{cookiecutter.camelca
self.{{cookiecutter.lowercase_modelname}} = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="{{cookiecutter.lowercase_modelname}}")
self.mlm = TF{{cookiecutter.camelcase_modelname}}MLMHead(config, input_embeddings=self.{{cookiecutter.lowercase_modelname}}.embeddings, name="mlm___cls")
def get_lm_head(self) -> tf.keras.layers.Layer:
def get_lm_head(self) -> keras.layers.Layer:
return self.mlm.predictions
@unpack_inputs
......@@ -1064,7 +1065,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForCausalLM(TF{{cookiecutter.camelca
self.{{cookiecutter.lowercase_modelname}} = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="{{cookiecutter.lowercase_modelname}}")
self.mlm = TF{{cookiecutter.camelcase_modelname}}MLMHead(config, input_embeddings=self.{{cookiecutter.lowercase_modelname}}.embeddings, name="mlm___cls")
def get_lm_head(self) -> tf.keras.layers.Layer:
def get_lm_head(self) -> keras.layers.Layer:
return self.mlm.predictions
def prepare_inputs_for_generation(self, inputs, past_key_values=None, attention_mask=None, **model_kwargs):
......@@ -1166,17 +1167,17 @@ class TF{{cookiecutter.camelcase_modelname}}ForCausalLM(TF{{cookiecutter.camelca
class TF{{cookiecutter.camelcase_modelname}}ClassificationHead(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}ClassificationHead(keras.layers.Layer):
"""Head for sentence-level classification tasks."""
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, *inputs, **kwargs):
super().__init__(config, *inputs, **kwargs)
self.dense = tf.keras.layers.Dense(
self.dense = keras.layers.Dense(
units=config.hidden_size, kernel_initializer=get_initializer(config.initializer_range), name="dense"
)
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.out_proj = tf.keras.layers.Dense(
self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.out_proj = keras.layers.Dense(
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="out_proj"
)
......@@ -1277,7 +1278,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForMultipleChoice(TF{{cookiecutter.c
self.sequence_summary = TFSequenceSummary(
config, config.initializer_range, name="sequence_summary"
)
self.classifier = tf.keras.layers.Dense(
self.classifier = keras.layers.Dense(
units=1, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
)
......@@ -1383,8 +1384,8 @@ class TF{{cookiecutter.camelcase_modelname}}ForTokenClassification(TF{{cookiecut
self.num_labels = config.num_labels
self.{{cookiecutter.lowercase_modelname}} = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="{{cookiecutter.lowercase_modelname}}")
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.classifier = tf.keras.layers.Dense(
self.dropout = keras.layers.Dropout(rate=config.hidden_dropout_prob)
self.classifier = keras.layers.Dense(
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
)
......@@ -1456,7 +1457,7 @@ class TF{{cookiecutter.camelcase_modelname}}ForQuestionAnswering(TF{{cookiecutte
self.num_labels = config.num_labels
self.{{cookiecutter.lowercase_modelname}} = TF{{cookiecutter.camelcase_modelname}}MainLayer(config, name="{{cookiecutter.lowercase_modelname}}")
self.qa_outputs = tf.keras.layers.Dense(
self.qa_outputs = keras.layers.Dense(
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
)
......@@ -1623,7 +1624,7 @@ def _expand_mask(mask: tf.Tensor, tgt_len: Optional[int] = None):
return (one_cst - expanded_mask) * LARGE_NEGATIVE
class TF{{cookiecutter.camelcase_modelname}}LearnedPositionalEmbedding(tf.keras.layers.Embedding):
class TF{{cookiecutter.camelcase_modelname}}LearnedPositionalEmbedding(keras.layers.Embedding):
"""
This module learns positional embeddings up to a fixed maximum size.
"""
......@@ -1639,7 +1640,7 @@ class TF{{cookiecutter.camelcase_modelname}}LearnedPositionalEmbedding(tf.keras.
return super().call(tf.cast(position_ids, dtype=tf.int32))
class TF{{cookiecutter.camelcase_modelname}}Attention(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}Attention(keras.layers.Layer):
"""Multi-headed attention from "Attention Is All You Need"""
def __init__(
......@@ -1655,16 +1656,16 @@ class TF{{cookiecutter.camelcase_modelname}}Attention(tf.keras.layers.Layer):
self.embed_dim = embed_dim
self.num_heads = num_heads
self.dropout = tf.keras.layers.Dropout(dropout)
self.dropout = keras.layers.Dropout(dropout)
self.head_dim = embed_dim // num_heads
assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
self.scaling = self.head_dim ** -0.5
self.is_decoder = is_decoder
self.k_proj = tf.keras.layers.Dense(embed_dim, use_bias=bias, name="k_proj")
self.q_proj = tf.keras.layers.Dense(embed_dim, use_bias=bias, name="q_proj")
self.v_proj = tf.keras.layers.Dense(embed_dim, use_bias=bias, name="v_proj")
self.out_proj = tf.keras.layers.Dense(embed_dim, use_bias=bias, name="out_proj")
self.k_proj = keras.layers.Dense(embed_dim, use_bias=bias, name="k_proj")
self.q_proj = keras.layers.Dense(embed_dim, use_bias=bias, name="q_proj")
self.v_proj = keras.layers.Dense(embed_dim, use_bias=bias, name="v_proj")
self.out_proj = keras.layers.Dense(embed_dim, use_bias=bias, name="out_proj")
def _shape(self, tensor: tf.Tensor, seq_len: int, bsz: int):
return tf.transpose(tf.reshape(tensor, (bsz, seq_len, self.num_heads, self.head_dim)), (0, 2, 1, 3))
......@@ -1776,20 +1777,20 @@ class TF{{cookiecutter.camelcase_modelname}}Attention(tf.keras.layers.Layer):
return attn_output, attn_weights, past_key_value
class TF{{cookiecutter.camelcase_modelname}}EncoderLayer(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}EncoderLayer(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
self.embed_dim = config.d_model
self.self_attn = TF{{cookiecutter.camelcase_modelname}}Attention(
self.embed_dim, config.encoder_attention_heads, dropout=config.attention_dropout, name="self_attn"
)
self.self_attn_layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="self_attn_layer_norm")
self.dropout = tf.keras.layers.Dropout(config.dropout)
self.self_attn_layer_norm = keras.layers.LayerNormalization(epsilon=1e-5, name="self_attn_layer_norm")
self.dropout = keras.layers.Dropout(config.dropout)
self.activation_fn = get_tf_activation(config.activation_function)
self.activation_dropout = tf.keras.layers.Dropout(config.activation_dropout)
self.fc1 = tf.keras.layers.Dense(config.encoder_ffn_dim, name="fc1")
self.fc2 = tf.keras.layers.Dense(self.embed_dim, name="fc2")
self.final_layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="final_layer_norm")
self.activation_dropout = keras.layers.Dropout(config.activation_dropout)
self.fc1 = keras.layers.Dense(config.encoder_ffn_dim, name="fc1")
self.fc2 = keras.layers.Dense(self.embed_dim, name="fc2")
self.final_layer_norm = keras.layers.LayerNormalization(epsilon=1e-5, name="final_layer_norm")
def call(self, hidden_states: tf.Tensor, attention_mask: tf.Tensor, layer_head_mask: tf.Tensor, training=False):
"""
......@@ -1826,7 +1827,7 @@ class TF{{cookiecutter.camelcase_modelname}}EncoderLayer(tf.keras.layers.Layer):
return hidden_states, self_attn_weights
class TF{{cookiecutter.camelcase_modelname}}DecoderLayer(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}DecoderLayer(keras.layers.Layer):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
self.embed_dim = config.d_model
......@@ -1837,11 +1838,11 @@ class TF{{cookiecutter.camelcase_modelname}}DecoderLayer(tf.keras.layers.Layer):
name="self_attn",
is_decoder=True,
)
self.dropout = tf.keras.layers.Dropout(config.dropout)
self.dropout = keras.layers.Dropout(config.dropout)
self.activation_fn = get_tf_activation(config.activation_function)
self.activation_dropout = tf.keras.layers.Dropout(config.activation_dropout)
self.activation_dropout = keras.layers.Dropout(config.activation_dropout)
self.self_attn_layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="self_attn_layer_norm")
self.self_attn_layer_norm = keras.layers.LayerNormalization(epsilon=1e-5, name="self_attn_layer_norm")
self.encoder_attn = TF{{cookiecutter.camelcase_modelname}}Attention(
self.embed_dim,
config.decoder_attention_heads,
......@@ -1849,10 +1850,10 @@ class TF{{cookiecutter.camelcase_modelname}}DecoderLayer(tf.keras.layers.Layer):
name="encoder_attn",
is_decoder=True,
)
self.encoder_attn_layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="encoder_attn_layer_norm")
self.fc1 = tf.keras.layers.Dense(config.decoder_ffn_dim, name="fc1")
self.fc2 = tf.keras.layers.Dense(self.embed_dim, name="fc2")
self.final_layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="final_layer_norm")
self.encoder_attn_layer_norm = keras.layers.LayerNormalization(epsilon=1e-5, name="encoder_attn_layer_norm")
self.fc1 = keras.layers.Dense(config.decoder_ffn_dim, name="fc1")
self.fc2 = keras.layers.Dense(self.embed_dim, name="fc2")
self.final_layer_norm = keras.layers.LayerNormalization(epsilon=1e-5, name="final_layer_norm")
def call(
self,
......@@ -1944,7 +1945,7 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
generic methods the library implements for all its model (such as downloading or saving, resizing the input
embeddings, pruning heads etc.)
This model is also a [tf.keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use
This model is also a [keras.Model](https://www.tensorflow.org/api_docs/python/tf/keras/Model) subclass. Use
it as a regular TF 2.0 Keras Model and refer to the TF 2.0 documentation for all matter related to general usage
and behavior.
......@@ -2062,7 +2063,7 @@ class TF{{cookiecutter.camelcase_modelname}}PreTrainedModel(TFPreTrainedModel):
@keras_serializable
class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}Encoder(keras.layers.Layer):
config_class = {{cookiecutter.camelcase_modelname}}Config
"""
Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
......@@ -2072,10 +2073,10 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
config: {{cookiecutter.camelcase_modelname}}Config
"""
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, embed_tokens: Optional[tf.keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.dropout = tf.keras.layers.Dropout(config.dropout)
self.dropout = keras.layers.Dropout(config.dropout)
self.layerdrop = config.encoder_layerdrop
self.padding_idx = config.pad_token_id
self.max_source_positions = config.max_position_embeddings
......@@ -2088,7 +2089,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
name="embed_positions",
)
self.layers = [TF{{cookiecutter.camelcase_modelname}}EncoderLayer(config, name=f"layers.{i}") for i in range(config.encoder_layers)]
self.layernorm_embedding = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding")
self.layernorm_embedding = keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding")
def get_embed_tokens(self):
return self.embed_tokens
......@@ -2215,7 +2216,7 @@ class TF{{cookiecutter.camelcase_modelname}}Encoder(tf.keras.layers.Layer):
@keras_serializable
class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}Decoder(keras.layers.Layer):
config_class = {{cookiecutter.camelcase_modelname}}Config
"""
Transformer decoder consisting of *config.decoder_layers* layers. Each layer is a [`TF{{cookiecutter.camelcase_modelname}}DecoderLayer`]
......@@ -2225,7 +2226,7 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
embed_tokens: output embedding
"""
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, embed_tokens: Optional[tf.keras.layers.Embedding] = None, **kwargs):
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, embed_tokens: Optional[keras.layers.Embedding] = None, **kwargs):
super().__init__(**kwargs)
self.config = config
self.padding_idx = config.pad_token_id
......@@ -2238,9 +2239,9 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
)
self.embed_scale = tf.math.sqrt(float(config.d_model)) if config.scale_embedding else 1.0
self.layers = [TF{{cookiecutter.camelcase_modelname}}DecoderLayer(config, name=f"layers.{i}") for i in range(config.decoder_layers)]
self.layernorm_embedding = tf.keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding")
self.layernorm_embedding = keras.layers.LayerNormalization(epsilon=1e-5, name="layernorm_embedding")
self.dropout = tf.keras.layers.Dropout(config.dropout)
self.dropout = keras.layers.Dropout(config.dropout)
def get_embed_tokens(self):
return self.embed_tokens
......@@ -2458,17 +2459,17 @@ class TF{{cookiecutter.camelcase_modelname}}Decoder(tf.keras.layers.Layer):
@keras_serializable
class TF{{cookiecutter.camelcase_modelname}}MainLayer(tf.keras.layers.Layer):
class TF{{cookiecutter.camelcase_modelname}}MainLayer(keras.layers.Layer):
config_class = {{cookiecutter.camelcase_modelname}}Config
def __init__(self, config: {{cookiecutter.camelcase_modelname}}Config, **kwargs):
super().__init__(**kwargs)
self.config = config
self.shared = tf.keras.layers.Embedding(
self.shared = keras.layers.Embedding(
input_dim=config.vocab_size,
output_dim=config.d_model,
embeddings_initializer=tf.keras.initializers.TruncatedNormal(stddev=self.config.init_std),
embeddings_initializer=keras.initializers.TruncatedNormal(stddev=self.config.init_std),
name="model.shared"
)
# Additional attribute to specify the expected name scope of the layer (for loading/storing weights)
......@@ -2637,9 +2638,9 @@ class TF{{cookiecutter.camelcase_modelname}}Model(TF{{cookiecutter.camelcase_mod
# Copied from transformers.models.bart.modeling_tf_bart.BiasLayer
class BiasLayer(tf.keras.layers.Layer):
class BiasLayer(keras.layers.Layer):
"""
Bias as a layer. It is used for serialization purposes: `tf.keras.Model.save_weights` stores on a per-layer basis,
Bias as a layer. It is used for serialization purposes: `keras.Model.save_weights` stores on a per-layer basis,
so all weights have to be registered in a layer.
"""
......@@ -2811,9 +2812,9 @@ class TF{{cookiecutter.camelcase_modelname}}ForConditionalGeneration(TF{{cookiec
def hf_compute_loss(self, labels, logits):
"""CrossEntropyLoss that ignores pad tokens"""
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(
loss_fn = keras.losses.SparseCategoricalCrossentropy(
from_logits=True,
reduction=tf.keras.losses.Reduction.NONE,
reduction=keras.losses.Reduction.NONE,
)
melted_labels = tf.reshape(labels, (-1,))
active_loss = tf.not_equal(melted_labels, self.config.pad_token_id)
......
......@@ -43,6 +43,7 @@ if is_tf_available():
TFMinLengthLogitsProcessor,
tf_top_k_top_p_filtering,
)
from transformers.modeling_tf_utils import keras
if is_tensorflow_text_available():
import tensorflow_text as text
......@@ -254,7 +255,7 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
# file needed to load the TF tokenizer
hf_hub_download(repo_id="google/flan-t5-small", filename="spiece.model", local_dir=tmp_dir)
class CompleteSentenceTransformer(tf.keras.layers.Layer):
class CompleteSentenceTransformer(keras.layers.Layer):
def __init__(self):
super().__init__()
self.tokenizer = text.SentencepieceTokenizer(
......@@ -271,9 +272,9 @@ class TFGenerationIntegrationTests(unittest.TestCase, GenerationIntegrationTests
return self.tokenizer.detokenize(outputs)
complete_model = CompleteSentenceTransformer()
inputs = tf.keras.layers.Input(shape=(1,), dtype=tf.string, name="inputs")
inputs = keras.layers.Input(shape=(1,), dtype=tf.string, name="inputs")
outputs = complete_model(inputs)
keras_model = tf.keras.Model(inputs, outputs)
keras_model = keras.Model(inputs, outputs)
keras_model.save(tmp_dir)
def test_eos_token_id_int_and_list_top_k_top_sampling(self):
......
......@@ -10,6 +10,8 @@ from transformers.testing_utils import require_tensorflow_text, require_tf, slow
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_utils import keras
if is_tensorflow_text_available():
from transformers.models.bert import TFBertTokenizer
......@@ -18,8 +20,9 @@ TOKENIZER_CHECKPOINTS = ["bert-base-uncased", "bert-base-cased"]
TINY_MODEL_CHECKPOINT = "hf-internal-testing/tiny-bert-tf-only"
if is_tf_available():
from transformers.modeling_tf_utils import keras
class ModelToSave(tf.keras.Model):
class ModelToSave(keras.Model):
def __init__(self, tokenizer):
super().__init__()
self.tokenizer = tokenizer
......
......@@ -44,6 +44,7 @@ if is_tf_available():
TFBlipTextModel,
TFBlipVisionModel,
)
from transformers.modeling_tf_utils import keras
from transformers.models.blip.modeling_tf_blip import TF_BLIP_PRETRAINED_MODEL_ARCHIVE_LIST
......@@ -172,9 +173,9 @@ class TFBlipVisionModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Layer))
self.assertTrue(x is None or isinstance(x, keras.layers.Layer))
def test_model(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs()
......
......@@ -38,6 +38,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import TFCLIPModel, TFCLIPTextModel, TFCLIPVisionModel, TFSharedEmbeddings
from transformers.modeling_tf_utils import keras
from transformers.models.clip.modeling_tf_clip import TF_CLIP_PRETRAINED_MODEL_ARCHIVE_LIST
......@@ -151,9 +152,9 @@ class TFCLIPVisionModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Layer))
self.assertTrue(x is None or isinstance(x, keras.layers.Layer))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
......@@ -283,7 +284,7 @@ class TFCLIPVisionModelTest(TFModelTesterMixin, unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]
......@@ -443,7 +444,7 @@ class TFCLIPTextModelTest(TFModelTesterMixin, unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]
......@@ -565,7 +566,7 @@ class TFCLIPModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
and module_member_name[: -len("MainLayer")] == model_class.__name__[: -len("Model")]
for module_member in (getattr(module, module_member_name),)
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
}
for main_layer_class in tf_main_layer_classes:
......@@ -579,17 +580,17 @@ class TFCLIPModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
main_layer = main_layer_class(config)
symbolic_inputs = {
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
name: keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
}
model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
model = keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
outputs = model(inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "keras_model.h5")
model.save(filepath)
if "T5" in main_layer_class.__name__:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath,
custom_objects={
main_layer_class.__name__: main_layer_class,
......@@ -597,10 +598,10 @@ class TFCLIPModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
},
)
else:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
)
assert isinstance(model, tf.keras.Model)
assert isinstance(model, keras.Model)
after_outputs = model(inputs_dict)
self.assert_outputs_same(after_outputs, outputs)
......
......@@ -37,6 +37,7 @@ if is_tf_available():
TFConvBertForTokenClassification,
TFConvBertModel,
)
from transformers.modeling_tf_utils import keras
class TFConvBertModelTester:
......@@ -306,7 +307,7 @@ class TFConvBertModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
if self.is_encoder_decoder:
......
......@@ -29,6 +29,7 @@ from ...test_pipeline_mixin import PipelineTesterMixin
if is_tf_available():
import tensorflow as tf
from transformers.modeling_tf_utils import keras
from transformers.models.ctrl.modeling_tf_ctrl import (
TF_CTRL_PRETRAINED_MODEL_ARCHIVE_LIST,
TFCTRLForSequenceClassification,
......@@ -226,18 +227,18 @@ class TFCTRLModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
for model_class in self.all_model_classes:
model = model_class(config)
model.build_in_name_scope() # may be needed for the get_bias() call below
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
assert isinstance(model.get_input_embeddings(), keras.layers.Layer)
if model_class in list_lm_models:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
assert isinstance(x, keras.layers.Layer)
name = model.get_bias()
assert isinstance(name, dict)
for k, v in name.items():
assert isinstance(v, tf.Variable)
elif model_class in list_other_models_with_output_ebd:
x = model.get_output_embeddings()
assert isinstance(x, tf.keras.layers.Layer)
assert isinstance(x, keras.layers.Layer)
name = model.get_bias()
assert name is None
else:
......
......@@ -22,6 +22,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import TFCvtForImageClassification, TFCvtModel
from transformers.modeling_tf_utils import keras
from transformers.models.cvt.modeling_tf_cvt import TF_CVT_PRETRAINED_MODEL_ARCHIVE_LIST
......@@ -191,10 +192,10 @@ class TFCvtModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
@unittest.skip(reason="Get `Failed to determine best cudnn convolution algo.` error after using TF 2.12+cuda 11.8")
def test_keras_fit_mixed_precision(self):
policy = tf.keras.mixed_precision.Policy("mixed_float16")
tf.keras.mixed_precision.set_global_policy(policy)
policy = keras.mixed_precision.Policy("mixed_float16")
keras.mixed_precision.set_global_policy(policy)
super().test_keras_fit()
tf.keras.mixed_precision.set_global_policy("float32")
keras.mixed_precision.set_global_policy("float32")
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
......
......@@ -39,6 +39,7 @@ if is_tf_available():
TFData2VecVisionForSemanticSegmentation,
TFData2VecVisionModel,
)
from transformers.modeling_tf_utils import keras
from transformers.models.data2vec.modeling_tf_data2vec_vision import (
TF_DATA2VEC_VISION_PRETRAINED_MODEL_ARCHIVE_LIST,
)
......@@ -216,9 +217,9 @@ class TFData2VecVisionModelTest(TFModelTesterMixin, PipelineTesterMixin, unittes
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Layer))
self.assertTrue(x is None or isinstance(x, keras.layers.Layer))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
......@@ -365,7 +366,7 @@ class TFData2VecVisionModelTest(TFModelTesterMixin, PipelineTesterMixin, unittes
key: val for key, val in prepared_for_class.items() if key not in label_names
}
self.assertGreater(len(inputs_minus_labels), 0)
model.compile(optimizer=tf.keras.optimizers.SGD(0.0), run_eagerly=True)
model.compile(optimizer=keras.optimizers.SGD(0.0), run_eagerly=True)
# Make sure the model fits without crashing regardless of where we pass the labels
history1 = model.fit(
......
......@@ -40,6 +40,7 @@ if is_tf_available():
TFDeiTForMaskedImageModeling,
TFDeiTModel,
)
from transformers.modeling_tf_utils import keras
from transformers.models.deit.modeling_tf_deit import TF_DEIT_PRETRAINED_MODEL_ARCHIVE_LIST
......@@ -211,9 +212,9 @@ class TFDeiTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Dense))
self.assertTrue(x is None or isinstance(x, keras.layers.Dense))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
......
......@@ -37,6 +37,7 @@ if is_tf_available():
TFEfficientFormerForImageClassificationWithTeacher,
TFEfficientFormerModel,
)
from transformers.modeling_tf_utils import keras
from transformers.models.efficientformer.modeling_tf_efficientformer import (
TF_EFFICIENTFORMER_PRETRAINED_MODEL_ARCHIVE_LIST,
)
......@@ -355,7 +356,7 @@ class TFEfficientFormerModelTest(TFModelTesterMixin, PipelineTesterMixin, unitte
# These are maximally general inputs for the model, with multiple None dimensions
# Hopefully this will catch any conditionals that fail for flexible shapes
functional_inputs = {
key: tf.keras.Input(shape=val.shape[1:], dtype=val.dtype, name=key)
key: keras.Input(shape=val.shape[1:], dtype=val.dtype, name=key)
for key, val in model.input_signature.items()
if key in model.dummy_inputs
}
......
......@@ -509,7 +509,7 @@ class TFEncoderDecoderMixin:
tf_outputs = tf_model(tf_inputs_dict)
# tf models returned loss is usually a tensor rather than a scalar.
# (see `hf_compute_loss`: it uses `tf.keras.losses.Reduction.NONE`)
# (see `hf_compute_loss`: it uses `keras.losses.Reduction.NONE`)
# Change it here to a scalar to match PyTorch models' loss
tf_loss = getattr(tf_outputs, "loss", None)
if tf_loss is not None:
......
......@@ -30,6 +30,7 @@ if is_tf_available():
import numpy
import tensorflow as tf
from transformers.modeling_tf_utils import keras
from transformers.models.esm.modeling_tf_esm import (
TF_ESM_PRETRAINED_MODEL_ARCHIVE_LIST,
TFEsmForMaskedLM,
......@@ -269,7 +270,7 @@ class TFEsmModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
for model_class in self.all_model_classes:
model = model_class(config)
assert isinstance(model.get_input_embeddings(), tf.keras.layers.Layer)
assert isinstance(model.get_input_embeddings(), keras.layers.Layer)
if model_class is TFEsmForMaskedLM:
# Output embedding test differs from the main test because they're a matrix, not a layer
name = model.get_bias()
......
......@@ -10,6 +10,7 @@ from transformers.testing_utils import require_keras_nlp, require_tf, slow
if is_tf_available():
import tensorflow as tf
if is_keras_nlp_available():
from transformers.models.gpt2 import TFGPT2Tokenizer
......
......@@ -46,6 +46,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import TFGroupViTModel, TFGroupViTTextModel, TFGroupViTVisionModel, TFSharedEmbeddings
from transformers.modeling_tf_utils import keras
from transformers.models.groupvit.modeling_tf_groupvit import TF_GROUPVIT_PRETRAINED_MODEL_ARCHIVE_LIST
......@@ -186,9 +187,9 @@ class TFGroupViTVisionModelTest(TFModelTesterMixin, unittest.TestCase):
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Layer))
self.assertTrue(x is None or isinstance(x, keras.layers.Layer))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
......@@ -340,7 +341,7 @@ class TFGroupViTVisionModelTest(TFModelTesterMixin, unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]
......@@ -505,7 +506,7 @@ class TFGroupViTTextModelTest(TFModelTesterMixin, unittest.TestCase):
with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
model = keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]
......@@ -655,7 +656,7 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
and module_member_name[: -len("MainLayer")] == model_class.__name__[: -len("Model")]
for module_member in (getattr(module, module_member_name),)
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
}
for main_layer_class in tf_main_layer_classes:
......@@ -669,17 +670,17 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
main_layer = main_layer_class(config)
symbolic_inputs = {
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
name: keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
}
model = tf.keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
model = keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))
outputs = model(inputs_dict)
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "keras_model.h5")
model.save(filepath)
if "T5" in main_layer_class.__name__:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath,
custom_objects={
main_layer_class.__name__: main_layer_class,
......@@ -687,10 +688,10 @@ class TFGroupViTModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.Test
},
)
else:
model = tf.keras.models.load_model(
model = keras.models.load_model(
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
)
assert isinstance(model, tf.keras.Model)
assert isinstance(model, keras.Model)
after_outputs = model(inputs_dict)
self.assert_outputs_same(after_outputs, outputs)
......
......@@ -36,6 +36,7 @@ if is_tf_available():
import tensorflow as tf
from transformers import SamProcessor, TFSamModel
from transformers.modeling_tf_utils import keras
if is_vision_available():
from PIL import Image
......@@ -322,9 +323,9 @@ class TFSamModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase)
for model_class in self.all_model_classes:
model = model_class(config)
self.assertIsInstance(model.get_input_embeddings(), (tf.keras.layers.Layer))
self.assertIsInstance(model.get_input_embeddings(), (keras.layers.Layer))
x = model.get_output_embeddings()
self.assertTrue(x is None or isinstance(x, tf.keras.layers.Dense))
self.assertTrue(x is None or isinstance(x, keras.layers.Dense))
def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment