"docs/vscode:/vscode.git/clone" did not exist on "62c14d2f522b28c17f2f5c2aa9ce453fb1b68e33"
Commit b1157cf4 authored by Scott Zhu's avatar Scott Zhu Committed by A. Unique TensorFlower
Browse files

Prepare for upcoming keras initializer change.

PiperOrigin-RevId: 451481056
parent a5d7a452
...@@ -19,6 +19,7 @@ import collections ...@@ -19,6 +19,7 @@ import collections
from absl import logging from absl import logging
import tensorflow as tf import tensorflow as tf
from official.modeling import tf_utils
from official.nlp.modeling import layers from official.nlp.modeling import layers
from official.projects.roformer import roformer_encoder_block from official.projects.roformer import roformer_encoder_block
...@@ -115,7 +116,7 @@ class RoformerEncoder(tf.keras.Model): ...@@ -115,7 +116,7 @@ class RoformerEncoder(tf.keras.Model):
embedding_layer_inst = layers.on_device_embedding.OnDeviceEmbedding( embedding_layer_inst = layers.on_device_embedding.OnDeviceEmbedding(
vocab_size=vocab_size, vocab_size=vocab_size,
embedding_width=embedding_width, embedding_width=embedding_width,
initializer=initializer, initializer=tf_utils.clone_initializer(initializer),
name='word_embeddings') name='word_embeddings')
else: else:
embedding_layer_inst = embedding_layer embedding_layer_inst = embedding_layer
...@@ -125,7 +126,7 @@ class RoformerEncoder(tf.keras.Model): ...@@ -125,7 +126,7 @@ class RoformerEncoder(tf.keras.Model):
type_embedding_layer = layers.on_device_embedding.OnDeviceEmbedding( type_embedding_layer = layers.on_device_embedding.OnDeviceEmbedding(
vocab_size=type_vocab_size, vocab_size=type_vocab_size,
embedding_width=embedding_width, embedding_width=embedding_width,
initializer=initializer, initializer=tf_utils.clone_initializer(initializer),
use_one_hot=True, use_one_hot=True,
name='type_embeddings') name='type_embeddings')
type_embeddings = type_embedding_layer(type_ids) type_embeddings = type_embedding_layer(type_ids)
...@@ -146,7 +147,7 @@ class RoformerEncoder(tf.keras.Model): ...@@ -146,7 +147,7 @@ class RoformerEncoder(tf.keras.Model):
'...x,xy->...y', '...x,xy->...y',
output_shape=hidden_size, output_shape=hidden_size,
bias_axes='y', bias_axes='y',
kernel_initializer=initializer, kernel_initializer=tf_utils.clone_initializer(initializer),
name='embedding_projection') name='embedding_projection')
embeddings = embedding_projection(embeddings) embeddings = embedding_projection(embeddings)
else: else:
...@@ -171,7 +172,7 @@ class RoformerEncoder(tf.keras.Model): ...@@ -171,7 +172,7 @@ class RoformerEncoder(tf.keras.Model):
attention_dropout=attention_dropout, attention_dropout=attention_dropout,
norm_first=norm_first, norm_first=norm_first,
output_range=transformer_output_range, output_range=transformer_output_range,
kernel_initializer=initializer, kernel_initializer=tf_utils.clone_initializer(initializer),
name='roformer/layer_%d' % i) name='roformer/layer_%d' % i)
transformer_layers.append(layer) transformer_layers.append(layer)
data = layer([data, attention_mask]) data = layer([data, attention_mask])
...@@ -185,7 +186,7 @@ class RoformerEncoder(tf.keras.Model): ...@@ -185,7 +186,7 @@ class RoformerEncoder(tf.keras.Model):
pooler_layer = tf.keras.layers.Dense( pooler_layer = tf.keras.layers.Dense(
units=hidden_size, units=hidden_size,
activation='tanh', activation='tanh',
kernel_initializer=initializer, kernel_initializer=tf_utils.clone_initializer(initializer),
name='pooler_transform') name='pooler_transform')
cls_output = pooler_layer(first_token_tensor) cls_output = pooler_layer(first_token_tensor)
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Roformer TransformerEncoder block layer.""" """Roformer TransformerEncoder block layer."""
import tensorflow as tf import tensorflow as tf
from official.modeling import tf_utils
from official.projects.roformer import roformer_attention from official.projects.roformer import roformer_attention
...@@ -111,7 +112,8 @@ class RoformerEncoderBlock(tf.keras.layers.Layer): ...@@ -111,7 +112,8 @@ class RoformerEncoderBlock(tf.keras.layers.Layer):
self._attention_initializer = tf.keras.initializers.get( self._attention_initializer = tf.keras.initializers.get(
attention_initializer) attention_initializer)
else: else:
self._attention_initializer = self._kernel_initializer self._attention_initializer = tf_utils.clone_initializer(
self._kernel_initializer)
self._attention_axes = attention_axes self._attention_axes = attention_axes
def build(self, input_shape): def build(self, input_shape):
...@@ -164,7 +166,7 @@ class RoformerEncoderBlock(tf.keras.layers.Layer): ...@@ -164,7 +166,7 @@ class RoformerEncoderBlock(tf.keras.layers.Layer):
einsum_equation, einsum_equation,
output_shape=(None, self._inner_dim), output_shape=(None, self._inner_dim),
bias_axes="d", bias_axes="d",
kernel_initializer=self._kernel_initializer, kernel_initializer=tf_utils.clone_initializer(self._kernel_initializer),
name="intermediate", name="intermediate",
**common_kwargs) **common_kwargs)
policy = tf.keras.mixed_precision.global_policy() policy = tf.keras.mixed_precision.global_policy()
...@@ -182,7 +184,7 @@ class RoformerEncoderBlock(tf.keras.layers.Layer): ...@@ -182,7 +184,7 @@ class RoformerEncoderBlock(tf.keras.layers.Layer):
output_shape=(None, hidden_size), output_shape=(None, hidden_size),
bias_axes="d", bias_axes="d",
name="output", name="output",
kernel_initializer=self._kernel_initializer, kernel_initializer=tf_utils.clone_initializer(self._kernel_initializer),
**common_kwargs) **common_kwargs)
self._output_dropout = tf.keras.layers.Dropout(rate=self._output_dropout) self._output_dropout = tf.keras.layers.Dropout(rate=self._output_dropout)
# Use float32 in layernorm for numeric stability. # Use float32 in layernorm for numeric stability.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment