Unverified Commit 09d9656f authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'panoptic-segmentation' into panoptic-deeplab-modeling

parents ac671306 49a5706c
...@@ -76,8 +76,8 @@ class EmbeddingSharedWeights(tf.keras.layers.Layer): ...@@ -76,8 +76,8 @@ class EmbeddingSharedWeights(tf.keras.layers.Layer):
with tf.name_scope("embedding"): with tf.name_scope("embedding"):
# Create binary mask of size [batch_size, length] # Create binary mask of size [batch_size, length]
embeddings = tf.gather(self.shared_weights, inputs) embeddings = tf.gather(self.shared_weights, inputs)
mask = tf.cast(tf.not_equal(inputs, 0), embeddings.dtype) # mask = tf.cast(tf.not_equal(inputs, 0), embeddings.dtype)
embeddings *= tf.expand_dims(mask, -1) # embeddings *= tf.expand_dims(mask, -1)
# Scale embedding by the sqrt of the hidden size # Scale embedding by the sqrt of the hidden size
embeddings *= self.hidden_size**0.5 embeddings *= self.hidden_size**0.5
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
from absl import flags from absl import flags
import tensorflow as tf import tensorflow as tf
from official.nlp.transformer import model_params from official.legacy.transformer import model_params
from official.utils.flags import core as flags_core from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils from official.utils.misc import keras_utils
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
import tensorflow as tf import tensorflow as tf
from official.nlp.transformer import model_utils from official.legacy.transformer import model_utils
NEG_INF = -1e9 NEG_INF = -1e9
......
...@@ -19,14 +19,15 @@ Transformer model code source: https://github.com/tensorflow/tensor2tensor ...@@ -19,14 +19,15 @@ Transformer model code source: https://github.com/tensorflow/tensor2tensor
""" """
import tensorflow as tf import tensorflow as tf
from official.legacy.transformer import attention_layer
from official.legacy.transformer import embedding_layer
from official.legacy.transformer import ffn_layer
from official.legacy.transformer import metrics
from official.legacy.transformer import model_utils
from official.legacy.transformer.utils.tokenizer import EOS_ID
from official.nlp.modeling.layers import position_embedding from official.nlp.modeling.layers import position_embedding
from official.nlp.modeling.ops import beam_search from official.nlp.modeling.ops import beam_search
from official.nlp.transformer import attention_layer
from official.nlp.transformer import embedding_layer
from official.nlp.transformer import ffn_layer
from official.nlp.transformer import metrics
from official.nlp.transformer import model_utils
from official.nlp.transformer.utils.tokenizer import EOS_ID
# Disable the not-callable lint error, since it claims many objects are not # Disable the not-callable lint error, since it claims many objects are not
# callable when they actually are. # callable when they actually are.
...@@ -196,13 +197,12 @@ class Transformer(tf.keras.Model): ...@@ -196,13 +197,12 @@ class Transformer(tf.keras.Model):
with tf.name_scope("decode"): with tf.name_scope("decode"):
# Prepare inputs to decoder layers by shifting targets, adding positional # Prepare inputs to decoder layers by shifting targets, adding positional
# encoding and applying dropout. # encoding and applying dropout.
with tf.name_scope("shift_targets"):
# Shift targets to the right, and remove the last element
targets = tf.pad(targets, [[0, 0], [1, 0]])[:, :-1]
decoder_inputs = self.embedding_softmax_layer(targets) decoder_inputs = self.embedding_softmax_layer(targets)
decoder_inputs = tf.cast(decoder_inputs, self.params["dtype"]) decoder_inputs = tf.cast(decoder_inputs, self.params["dtype"])
attention_bias = tf.cast(attention_bias, self.params["dtype"]) attention_bias = tf.cast(attention_bias, self.params["dtype"])
with tf.name_scope("shift_targets"):
# Shift targets to the right, and remove the last element
decoder_inputs = tf.pad(decoder_inputs,
[[0, 0], [1, 0], [0, 0]])[:, :-1, :]
with tf.name_scope("add_pos_encoding"): with tf.name_scope("add_pos_encoding"):
length = tf.shape(decoder_inputs)[1] length = tf.shape(decoder_inputs)[1]
pos_encoding = self.position_embedding(decoder_inputs) pos_encoding = self.position_embedding(decoder_inputs)
......
...@@ -15,13 +15,12 @@ ...@@ -15,13 +15,12 @@
"""Forward pass test for Transformer model refactoring.""" """Forward pass test for Transformer model refactoring."""
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from official.legacy.transformer import metrics
from official.legacy.transformer import model_params
from official.legacy.transformer import transformer
from official.nlp.modeling import models from official.nlp.modeling import models
from official.nlp.transformer import metrics
from official.nlp.transformer import model_params
from official.nlp.transformer import transformer
def _count_params(layer, trainable_only=True): def _count_params(layer, trainable_only=True):
......
...@@ -16,10 +16,10 @@ ...@@ -16,10 +16,10 @@
import tensorflow as tf import tensorflow as tf
from official.nlp.transformer import attention_layer from official.legacy.transformer import attention_layer
from official.nlp.transformer import embedding_layer from official.legacy.transformer import embedding_layer
from official.nlp.transformer import ffn_layer from official.legacy.transformer import ffn_layer
from official.nlp.transformer import metrics from official.legacy.transformer import metrics
class TransformerLayersTest(tf.test.TestCase): class TransformerLayersTest(tf.test.TestCase):
......
...@@ -22,22 +22,25 @@ import os ...@@ -22,22 +22,25 @@ import os
import tempfile import tempfile
# Import libraries # Import libraries
from absl import app from absl import app
from absl import flags from absl import flags
from absl import logging from absl import logging
import tensorflow as tf import tensorflow as tf
from official.common import distribute_utils from official.common import distribute_utils
from official.legacy.transformer import compute_bleu
from official.legacy.transformer import data_pipeline
from official.legacy.transformer import metrics
from official.legacy.transformer import misc
from official.legacy.transformer import optimizer
from official.legacy.transformer import transformer
from official.legacy.transformer import translate
from official.legacy.transformer.utils import tokenizer
from official.modeling import performance from official.modeling import performance
from official.nlp.transformer import compute_bleu
from official.nlp.transformer import data_pipeline
from official.nlp.transformer import metrics
from official.nlp.transformer import misc
from official.nlp.transformer import optimizer
from official.nlp.transformer import transformer
from official.nlp.transformer import translate
from official.nlp.transformer.utils import tokenizer
from official.utils.flags import core as flags_core from official.utils.flags import core as flags_core
from official.utils.misc import keras_utils from official.utils.misc import keras_utils
# pylint:disable=logging-format-interpolation # pylint:disable=logging-format-interpolation
INF = int(1e9) INF = int(1e9)
...@@ -440,7 +443,6 @@ class TransformerTask(object): ...@@ -440,7 +443,6 @@ class TransformerTask(object):
opt = performance.configure_optimizer( opt = performance.configure_optimizer(
opt, opt,
use_float16=params["dtype"] == tf.float16, use_float16=params["dtype"] == tf.float16,
use_graph_rewrite=self.flags_obj.fp16_implementation == "graph_rewrite",
loss_scale=flags_core.get_loss_scale( loss_scale=flags_core.get_loss_scale(
self.flags_obj, default_for_fp16="dynamic")) self.flags_obj, default_for_fp16="dynamic"))
......
...@@ -23,8 +23,8 @@ from absl import flags ...@@ -23,8 +23,8 @@ from absl import flags
from absl.testing import flagsaver from absl.testing import flagsaver
import tensorflow as tf import tensorflow as tf
from tensorflow.python.eager import context # pylint: disable=ungrouped-imports from tensorflow.python.eager import context # pylint: disable=ungrouped-imports
from official.nlp.transformer import misc from official.legacy.transformer import misc
from official.nlp.transformer import transformer_main from official.legacy.transformer import transformer_main
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
FIXED_TIMESTAMP = 'my_time_stamp' FIXED_TIMESTAMP = 'my_time_stamp'
......
...@@ -16,8 +16,8 @@ ...@@ -16,8 +16,8 @@
import tensorflow as tf import tensorflow as tf
from official.nlp.transformer import model_params from official.legacy.transformer import model_params
from official.nlp.transformer import transformer from official.legacy.transformer import transformer
class TransformerV2Test(tf.test.TestCase): class TransformerV2Test(tf.test.TestCase):
......
...@@ -19,7 +19,7 @@ from absl import logging ...@@ -19,7 +19,7 @@ from absl import logging
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from official.nlp.transformer.utils import tokenizer from official.legacy.transformer.utils import tokenizer
_EXTRA_DECODE_LENGTH = 100 _EXTRA_DECODE_LENGTH = 100
_BEAM_SIZE = 4 _BEAM_SIZE = 4
......
...@@ -19,7 +19,7 @@ import tempfile ...@@ -19,7 +19,7 @@ import tempfile
import tensorflow as tf import tensorflow as tf
from official.nlp.transformer.utils import tokenizer from official.legacy.transformer.utils import tokenizer
class SubtokenizerTest(tf.test.TestCase): class SubtokenizerTest(tf.test.TestCase):
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tests for grad_utils."""
import tensorflow as tf
from official.modeling import grad_utils
from official.modeling import performance
class GradUtilsTest(tf.test.TestCase):
def test_minimize(self):
optimizer = tf.keras.optimizers.SGD(0.1)
with tf.GradientTape() as tape:
model = tf.keras.layers.Dense(2)
outputs = model(tf.zeros((2, 2), tf.float32))
loss = tf.reduce_mean(outputs)
grad_utils.minimize_using_explicit_allreduce(tape, optimizer, loss,
model.trainable_variables)
def test_minimize_fp16(self):
optimizer = performance.configure_optimizer(
tf.keras.optimizers.SGD(0.1), use_float16=True)
performance.set_mixed_precision_policy(tf.float16)
with tf.GradientTape() as tape:
model = tf.keras.layers.Dense(2)
outputs = model(tf.zeros((2, 2), tf.float16))
loss = tf.reduce_mean(outputs)
grad_utils.minimize_using_explicit_allreduce(tape, optimizer, loss,
model.trainable_variables)
# Test other fp16 settings.
def _clip_by_global_norm(grads_and_vars):
grads, tvars = list(zip(*grads_and_vars))
(grads, _) = tf.clip_by_global_norm(grads, clip_norm=1.0)
return zip(grads, tvars)
with tf.GradientTape() as tape:
model = tf.keras.layers.Dense(2)
outputs = model(tf.zeros((2, 2), tf.float16))
loss = tf.reduce_mean(outputs)
optimizer = performance.configure_optimizer(
tf.keras.optimizers.SGD(0.1), use_float16=True, loss_scale=128)
grad_utils.minimize_using_explicit_allreduce(
tape,
optimizer,
loss,
model.trainable_variables,
pre_allreduce_callbacks=[_clip_by_global_norm],
post_allreduce_callbacks=[_clip_by_global_norm])
def test_set_mixed_precision_policy(self):
performance.set_mixed_precision_policy(tf.float16)
performance.set_mixed_precision_policy(tf.bfloat16)
performance.set_mixed_precision_policy(tf.float32)
with self.assertRaises(ValueError):
performance.set_mixed_precision_policy(tf.int32)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment