Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling

09d9656f · Srihari Humbarwadi · GitHub · ac671306 · 49a5706c · 09d9656f
Unverified Commit 09d9656f authored Jan 13, 2022 by Srihari Humbarwadi Committed by GitHub Jan 13, 2022
20 changed files
--- a/official/nlp/transformer/embedding_layer.py
+++ b/official/nlp/transformer/embedding_layer.py
@@ -76,8 +76,8 @@ class EmbeddingSharedWeights(tf.keras.layers.Layer):
    with tf.name_scope("embedding"):
      # Create binary mask of size [batch_size, length]
      embeddings = tf.gather(self.shared_weights, inputs)
-      mask = tf.cast(tf.not_equal(inputs, 0), embeddings.dtype)
+      # mask = tf.cast(tf.not_equal(inputs, 0), embeddings.dtype)
-      embeddings *= tf.expand_dims(mask, -1)
+      # embeddings *= tf.expand_dims(mask, -1)
      # Scale embedding by the sqrt of the hidden size
      embeddings *= self.hidden_size**0.5

--- a/official/nlp/transformer/ffn_layer.py
+++ b/official/nlp/transformer/ffn_layer.py
--- a/official/nlp/transformer/metrics.py
+++ b/official/nlp/transformer/metrics.py
--- a/official/nlp/transformer/misc.py
+++ b/official/nlp/transformer/misc.py
@@ -19,7 +19,7 @@
 from absl import flags
 import tensorflow as tf
-from official.nlp.transformer import model_params
+from official.legacy.transformer import model_params
 from official.utils.flags import core as flags_core
 from official.utils.misc import keras_utils

--- a/official/nlp/transformer/model_params.py
+++ b/official/nlp/transformer/model_params.py
--- a/official/nlp/transformer/model_utils.py
+++ b/official/nlp/transformer/model_utils.py
--- a/official/nlp/transformer/model_utils_test.py
+++ b/official/nlp/transformer/model_utils_test.py
@@ -16,7 +16,7 @@
 import tensorflow as tf
-from official.nlp.transformer import model_utils
+from official.legacy.transformer import model_utils
 NEG_INF = -1e9

--- a/official/nlp/transformer/optimizer.py
+++ b/official/nlp/transformer/optimizer.py
--- a/official/nlp/transformer/transformer.py
+++ b/official/nlp/transformer/transformer.py
@@ -19,14 +19,15 @@ Transformer model code source: https://github.com/tensorflow/tensor2tensor
 """
 import tensorflow as tf
+from official.legacy.transformer import attention_layer
+from official.legacy.transformer import embedding_layer
+from official.legacy.transformer import ffn_layer
+from official.legacy.transformer import metrics
+from official.legacy.transformer import model_utils
+from official.legacy.transformer.utils.tokenizer import EOS_ID
 from official.nlp.modeling.layers import position_embedding
 from official.nlp.modeling.ops import beam_search
-from official.nlp.transformer import attention_layer
-from official.nlp.transformer import embedding_layer
-from official.nlp.transformer import ffn_layer
-from official.nlp.transformer import metrics
-from official.nlp.transformer import model_utils
-from official.nlp.transformer.utils.tokenizer import EOS_ID
 # Disable the not-callable lint error, since it claims many objects are not
 # callable when they actually are.
@@ -196,13 +197,12 @@ class Transformer(tf.keras.Model):
    with tf.name_scope("decode"):
      # Prepare inputs to decoder layers by shifting targets, adding positional
      # encoding and applying dropout.
+      with tf.name_scope("shift_targets"):
+        # Shift targets to the right, and remove the last element
+        targets = tf.pad(targets, [[0, 0], [1, 0]])[:, :-1]
      decoder_inputs = self.embedding_softmax_layer(targets)
      decoder_inputs = tf.cast(decoder_inputs, self.params["dtype"])
      attention_bias = tf.cast(attention_bias, self.params["dtype"])
-      with tf.name_scope("shift_targets"):
-        # Shift targets to the right, and remove the last element
-        decoder_inputs = tf.pad(decoder_inputs,
-                                [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
      with tf.name_scope("add_pos_encoding"):
        length = tf.shape(decoder_inputs)[1]
        pos_encoding = self.position_embedding(decoder_inputs)

--- a/official/nlp/transformer/transformer_forward_test.py
+++ b/official/nlp/transformer/transformer_forward_test.py
@@ -15,13 +15,12 @@
 """Forward pass test for Transformer model refactoring."""
 import numpy as np
 import tensorflow as tf
+from official.legacy.transformer import metrics
+from official.legacy.transformer import model_params
+from official.legacy.transformer import transformer
 from official.nlp.modeling import models
-from official.nlp.transformer import metrics
-from official.nlp.transformer import model_params
-from official.nlp.transformer import transformer
 def _count_params(layer, trainable_only=True):

--- a/official/nlp/transformer/transformer_layers_test.py
+++ b/official/nlp/transformer/transformer_layers_test.py
@@ -16,10 +16,10 @@
 import tensorflow as tf
-from official.nlp.transformer import attention_layer
+from official.legacy.transformer import attention_layer
-from official.nlp.transformer import embedding_layer
+from official.legacy.transformer import embedding_layer
-from official.nlp.transformer import ffn_layer
+from official.legacy.transformer import ffn_layer
-from official.nlp.transformer import metrics
+from official.legacy.transformer import metrics
 class TransformerLayersTest(tf.test.TestCase):

--- a/official/nlp/transformer/transformer_main.py
+++ b/official/nlp/transformer/transformer_main.py
@@ -22,22 +22,25 @@ import os
 import tempfile
 # Import libraries
 from absl import app
 from absl import flags
 from absl import logging
 import tensorflow as tf
 from official.common import distribute_utils
+from official.legacy.transformer import compute_bleu
+from official.legacy.transformer import data_pipeline
+from official.legacy.transformer import metrics
+from official.legacy.transformer import misc
+from official.legacy.transformer import optimizer
+from official.legacy.transformer import transformer
+from official.legacy.transformer import translate
+from official.legacy.transformer.utils import tokenizer
 from official.modeling import performance
-from official.nlp.transformer import compute_bleu
-from official.nlp.transformer import data_pipeline
-from official.nlp.transformer import metrics
-from official.nlp.transformer import misc
-from official.nlp.transformer import optimizer
-from official.nlp.transformer import transformer
-from official.nlp.transformer import translate
-from official.nlp.transformer.utils import tokenizer
 from official.utils.flags import core as flags_core
 from official.utils.misc import keras_utils
 # pylint:disable=logging-format-interpolation
 INF = int(1e9)
@@ -440,7 +443,6 @@ class TransformerTask(object):
    opt = performance.configure_optimizer(
        opt,
        use_float16=params["dtype"] == tf.float16,
-        use_graph_rewrite=self.flags_obj.fp16_implementation == "graph_rewrite",
        loss_scale=flags_core.get_loss_scale(
            self.flags_obj, default_for_fp16="dynamic"))

--- a/official/nlp/transformer/transformer_main_test.py
+++ b/official/nlp/transformer/transformer_main_test.py
@@ -23,8 +23,8 @@ from absl import flags
 from absl.testing import flagsaver
 import tensorflow as tf
 from tensorflow.python.eager import context  # pylint: disable=ungrouped-imports
-from official.nlp.transformer import misc
+from official.legacy.transformer import misc
-from official.nlp.transformer import transformer_main
+from official.legacy.transformer import transformer_main
 FLAGS = flags.FLAGS
 FIXED_TIMESTAMP = 'my_time_stamp'

--- a/official/nlp/transformer/transformer_test.py
+++ b/official/nlp/transformer/transformer_test.py
@@ -16,8 +16,8 @@
 import tensorflow as tf
-from official.nlp.transformer import model_params
+from official.legacy.transformer import model_params
-from official.nlp.transformer import transformer
+from official.legacy.transformer import transformer
 class TransformerV2Test(tf.test.TestCase):

--- a/official/nlp/transformer/translate.py
+++ b/official/nlp/transformer/translate.py
@@ -19,7 +19,7 @@ from absl import logging
 import numpy as np
 import tensorflow as tf
-from official.nlp.transformer.utils import tokenizer
+from official.legacy.transformer.utils import tokenizer
 _EXTRA_DECODE_LENGTH = 100
 _BEAM_SIZE = 4

--- a/official/vision/detection/modeling/architecture/__init__.py
+++ b/official/vision/detection/modeling/architecture/__init__.py
--- a/official/nlp/transformer/utils/metrics.py
+++ b/official/nlp/transformer/utils/metrics.py
--- a/official/nlp/transformer/utils/tokenizer.py
+++ b/official/nlp/transformer/utils/tokenizer.py
--- a/official/nlp/transformer/utils/tokenizer_test.py
+++ b/official/nlp/transformer/utils/tokenizer_test.py
@@ -19,7 +19,7 @@ import tempfile
 import tensorflow as tf
-from official.nlp.transformer.utils import tokenizer
+from official.legacy.transformer.utils import tokenizer
 class SubtokenizerTest(tf.test.TestCase):

--- a/official/modeling/grad_utils_test.py
+++ b/official/modeling/grad_utils_test.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for grad_utils."""
+import tensorflow as tf
+from official.modeling import grad_utils
+from official.modeling import performance
+class GradUtilsTest(tf.test.TestCase):
+  def test_minimize(self):
+    optimizer = tf.keras.optimizers.SGD(0.1)
+    with tf.GradientTape() as tape:
+      model = tf.keras.layers.Dense(2)
+      outputs = model(tf.zeros((2, 2), tf.float32))
+      loss = tf.reduce_mean(outputs)
+    grad_utils.minimize_using_explicit_allreduce(tape, optimizer, loss,
+                                                 model.trainable_variables)
+  def test_minimize_fp16(self):
+    optimizer = performance.configure_optimizer(
+        tf.keras.optimizers.SGD(0.1), use_float16=True)
+    performance.set_mixed_precision_policy(tf.float16)
+    with tf.GradientTape() as tape:
+      model = tf.keras.layers.Dense(2)
+      outputs = model(tf.zeros((2, 2), tf.float16))
+      loss = tf.reduce_mean(outputs)
+    grad_utils.minimize_using_explicit_allreduce(tape, optimizer, loss,
+                                                 model.trainable_variables)
+    # Test other fp16 settings.
+    def _clip_by_global_norm(grads_and_vars):
+      grads, tvars = list(zip(*grads_and_vars))
+      (grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
+      return zip(grads, tvars)
+    with tf.GradientTape() as tape:
+      model = tf.keras.layers.Dense(2)
+      outputs = model(tf.zeros((2, 2), tf.float16))
+      loss = tf.reduce_mean(outputs)
+    optimizer = performance.configure_optimizer(
+        tf.keras.optimizers.SGD(0.1), use_float16=True, loss_scale=128)
+    grad_utils.minimize_using_explicit_allreduce(
+        tape,
+        optimizer,
+        loss,
+        model.trainable_variables,
+        pre_allreduce_callbacks=[_clip_by_global_norm],
+        post_allreduce_callbacks=[_clip_by_global_norm])
+  def test_set_mixed_precision_policy(self):
+    performance.set_mixed_precision_policy(tf.float16)
+    performance.set_mixed_precision_policy(tf.bfloat16)
+    performance.set_mixed_precision_policy(tf.float32)
+    with self.assertRaises(ValueError):
+      performance.set_mixed_precision_policy(tf.int32)
+if __name__ == '__main__':
+  tf.test.main()