Internal change

PiperOrigin-RevId: 375007439

Internal change
PiperOrigin-RevId: 375007439
471aebdb · Rami Al-Rfou · A. Unique TensorFlower · 0abf6420 · 471aebdb · 471aebdb
Commit 471aebdb authored May 20, 2021 by Rami Al-Rfou Committed by A. Unique TensorFlower May 20, 2021
2 changed files
--- a/official/nlp/keras_nlp/layers/position_embedding.py
+++ b/official/nlp/keras_nlp/layers/position_embedding.py
@@ -33,7 +33,6 @@ class PositionEmbedding(tf.keras.layers.Layer):
    max_length: The maximum size of the dynamic sequence.
    initializer: The initializer to use for the embedding weights. Defaults to
      "glorot_uniform".
-    seq_axis: The axis of the input tensor where we add the embeddings.

  Reference: This layer creates a positional embedding as described in
  [BERT: Pre-training of Deep Bidirectional Transformers for Language
@@ -43,7 +42,6 @@ class PositionEmbedding(tf.keras.layers.Layer):
  def __init__(self,
               max_length,
               initializer="glorot_uniform",
-               seq_axis=1,
               **kwargs):

    super(PositionEmbedding, self).__init__(**kwargs)
@@ -53,13 +51,11 @@ class PositionEmbedding(tf.keras.layers.Layer):
      )
    self._max_length = max_length
    self._initializer = tf.keras.initializers.get(initializer)
-    self._seq_axis = seq_axis

  def get_config(self):
    config = {
        "max_length": self._max_length,
        "initializer": tf.keras.initializers.serialize(self._initializer),
-        "seq_axis": self._seq_axis,
    }
    base_config = super(PositionEmbedding, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))
@@ -67,17 +63,18 @@ class PositionEmbedding(tf.keras.layers.Layer):
  def build(self, input_shape):
    dimension_list = input_shape.as_list()

-    seq_length = dimension_list[self._seq_axis]
-    width = dimension_list[-1]
+    if len(dimension_list) != 3:
+      raise ValueError("PositionEmbedding expects a 3-dimensional input tensor "
+                       "of shape [batch, sequence, width], got "
+                       "{}".format(input_shape))
+    seq_length = dimension_list[1]
+    width = dimension_list[2]

    if self._max_length is not None:
      weight_sequence_length = self._max_length
    else:
      weight_sequence_length = seq_length

-    self._expanded_shape = [1 for d in dimension_list]
-    self._expanded_shape[self._seq_axis] = weight_sequence_length
-    self._expanded_shape[-1] = width
    self._position_embeddings = self.add_weight(
        "embeddings",
        shape=[weight_sequence_length, width],
@@ -87,8 +84,5 @@ class PositionEmbedding(tf.keras.layers.Layer):

  def call(self, inputs):
    input_shape = tf.shape(inputs)
-    actual_seq_len = input_shape[self._seq_axis]
-    position_embeddings = self._position_embeddings[:actual_seq_len, :]
-    self._expanded_shape[self._seq_axis] = actual_seq_len
-    position_embeddings = tf.reshape(position_embeddings, self._expanded_shape)
+    position_embeddings = self._position_embeddings[:input_shape[1], :]
    return tf.broadcast_to(position_embeddings, input_shape)
--- a/official/nlp/keras_nlp/layers/position_embedding_test.py
+++ b/official/nlp/keras_nlp/layers/position_embedding_test.py
@@ -42,22 +42,6 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
    # The default output dtype for this layer should be tf.float32.
    self.assertEqual(tf.float32, output_tensor.dtype)

-  def test_non_default_axis_static(self):
-    # Create a 3-dimensional input (the first dimension is implicit).
-    sequence_length = 21
-    test_layer = position_embedding.PositionEmbedding(
-        max_length=sequence_length, seq_axis=2)
-    width = 30
-    input_tensor = tf.keras.Input(shape=(sequence_length, width, width))
-    output_tensor = test_layer(input_tensor)
-
-    # When using static positional embedding shapes, the output is expected
-    # to be the same as the input shape in all dimensions save batch.
-    expected_output_shape = [None, sequence_length, width, width]
-    self.assertEqual(expected_output_shape, output_tensor.shape.as_list())
-    # The default output dtype for this layer should be tf.float32.
-    self.assertEqual(tf.float32, output_tensor.dtype)
-
  def test_float16_dtype(self):
    # Create a 3-dimensional input (the first dimension is implicit).
    sequence_length = 21
@@ -89,21 +73,6 @@ class PositionEmbeddingLayerTest(keras_parameterized.TestCase):
    expected_output_shape = [None, None, width]
    self.assertEqual(expected_output_shape, output_tensor.shape.as_list())

-  def test_non_default_axis_dynamic(self):
-    max_sequence_length = 60
-    test_layer = position_embedding.PositionEmbedding(
-        max_length=max_sequence_length, seq_axis=2)
-    # Create a 3-dimensional input (the first dimension is implicit).
-    width = 30
-    input_tensor = tf.keras.Input(shape=(None, None, width))
-    output_tensor = test_layer(input_tensor)
-
-    # When using dynamic positional embedding shapes, the output is expected
-    # to be the same as the input shape in all dimensions - but may be None if
-    # the input shape is None there.
-    expected_output_shape = [None, None, None, width]
-    self.assertEqual(expected_output_shape, output_tensor.shape.as_list())
-
  def test_dynamic_layer_slicing(self):
    max_sequence_length = 40
    test_layer = position_embedding.PositionEmbedding(