Update code to v2.11.0

32e4ca51 · qianyj · 9485aa1d · 71060f67 · 32e4ca51 · 9485aa1d
Commit 32e4ca51 authored Nov 28, 2023 by qianyj
20 changed files
--- a/official/legacy/image_classification/vgg/vgg_model.py
+++ b/official/legacy/image_classification/vgg/vgg_model.py
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""VGG16 model for Keras.
+
+Adapted from tf.keras.applications.vgg16.VGG16().
+
+Related papers/blogs:
+- https://arxiv.org/abs/1409.1556
+"""
+
+import tensorflow as tf
+
+layers = tf.keras.layers
+
+
+def _gen_l2_regularizer(use_l2_regularizer=True, l2_weight_decay=1e-4):
+  return tf.keras.regularizers.L2(
+      l2_weight_decay) if use_l2_regularizer else None
+
+
+def vgg16(num_classes,
+          batch_size=None,
+          use_l2_regularizer=True,
+          batch_norm_decay=0.9,
+          batch_norm_epsilon=1e-5):
+  """Instantiates the VGG16 architecture.
+
+  Args:
+    num_classes: `int` number of classes for image classification.
+    batch_size: Size of the batches for each step.
+    use_l2_regularizer: whether to use L2 regularizer on Conv/Dense layer.
+    batch_norm_decay: Moment of batch norm layers.
+    batch_norm_epsilon: Epsilon of batch borm layers.
+
+  Returns:
+    A Keras model instance.
+
+  """
+  input_shape = (224, 224, 3)
+  img_input = layers.Input(shape=input_shape, batch_size=batch_size)
+
+  x = img_input
+
+  if tf.keras.backend.image_data_format() == 'channels_first':
+    x = layers.Permute((3, 1, 2))(x)
+    bn_axis = 1
+  else:  # channels_last
+    bn_axis = 3
+  # Block 1
+  x = layers.Conv2D(
+      64, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block1_conv1')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv1')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.Conv2D(
+      64, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block1_conv2')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv2')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
+
+  # Block 2
+  x = layers.Conv2D(
+      128, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block2_conv1')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv3')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.Conv2D(
+      128, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block2_conv2')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv4')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
+
+  # Block 3
+  x = layers.Conv2D(
+      256, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block3_conv1')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv5')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.Conv2D(
+      256, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block3_conv2')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv6')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.Conv2D(
+      256, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block3_conv3')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv7')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
+
+  # Block 4
+  x = layers.Conv2D(
+      512, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block4_conv1')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv8')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.Conv2D(
+      512, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block4_conv2')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv9')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.Conv2D(
+      512, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block4_conv3')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv10')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
+
+  # Block 5
+  x = layers.Conv2D(
+      512, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block5_conv1')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv11')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.Conv2D(
+      512, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block5_conv2')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv12')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.Conv2D(
+      512, (3, 3),
+      padding='same',
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='block5_conv3')(
+          x)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=batch_norm_decay,
+      epsilon=batch_norm_epsilon,
+      name='bn_conv13')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
+
+  x = layers.Flatten(name='flatten')(x)
+  x = layers.Dense(
+      4096,
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='fc1')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.Dropout(0.5)(x)
+  x = layers.Dense(
+      4096,
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='fc2')(
+          x)
+  x = layers.Activation('relu')(x)
+  x = layers.Dropout(0.5)(x)
+  x = layers.Dense(
+      num_classes,
+      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+      name='fc1000')(
+          x)
+
+  x = layers.Activation('softmax', dtype='float32')(x)
+
+  # Create model.
+  return tf.keras.Model(img_input, x, name='vgg16')
--- a/official/legacy/nlp/albert/__init__.py
+++ b/official/legacy/nlp/albert/__init__.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
--- a/official/legacy/nlp/albert/configs.py
+++ b/official/legacy/nlp/albert/configs.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""The ALBERT configurations."""
-
-import six
-
-from official.nlp.bert import configs
-
-
-class AlbertConfig(configs.BertConfig):
-  """Configuration for `ALBERT`."""
-
-  def __init__(self, num_hidden_groups=1, inner_group_num=1, **kwargs):
-    """Constructs AlbertConfig.
-
-    Args:
-      num_hidden_groups: Number of group for the hidden layers, parameters in
-        the same group are shared. Note that this value and also the following
-        'inner_group_num' has to be 1 for now, because all released ALBERT
-        models set them to 1. We may support arbitary valid values in future.
-      inner_group_num: Number of inner repetition of attention and ffn.
-      **kwargs: The remaining arguments are the same as above 'BertConfig'.
-    """
-    super(AlbertConfig, self).__init__(**kwargs)
-
-    # TODO(chendouble): 'inner_group_num' and 'num_hidden_groups' are always 1
-    # in the released ALBERT. Support other values in AlbertEncoder if needed.
-    if inner_group_num != 1 or num_hidden_groups != 1:
-      raise ValueError("We only support 'inner_group_num' and "
-                       "'num_hidden_groups' as 1.")
-
-  @classmethod
-  def from_dict(cls, json_object):
-    """Constructs a `AlbertConfig` from a Python dictionary of parameters."""
-    config = AlbertConfig(vocab_size=None)
-    for (key, value) in six.iteritems(json_object):
-      config.__dict__[key] = value
-    return config
--- a/official/legacy/transformer/__init__.py
+++ b/official/legacy/transformer/__init__.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/attention_layer.py
+++ b/official/legacy/transformer/attention_layer.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@ import math

 import tensorflow as tf

+from official.modeling import tf_utils
+

 class Attention(tf.keras.layers.Layer):
  """Multi-headed attention layer."""
@@ -50,27 +52,27 @@ class Attention(tf.keras.layers.Layer):

    attention_initializer = _glorot_initializer(input_shape.as_list()[-1],
                                                self.hidden_size)
-    self.query_dense_layer = tf.keras.layers.experimental.EinsumDense(
+    self.query_dense_layer = tf.keras.layers.EinsumDense(
        "BTE,ENH->BTNH",
        output_shape=(None, self.num_heads, size_per_head),
-        kernel_initializer=attention_initializer,
+        kernel_initializer=tf_utils.clone_initializer(attention_initializer),
        bias_axes=None,
        name="query")
-    self.key_dense_layer = tf.keras.layers.experimental.EinsumDense(
+    self.key_dense_layer = tf.keras.layers.EinsumDense(
        "BTE,ENH->BTNH",
        output_shape=(None, self.num_heads, size_per_head),
-        kernel_initializer=attention_initializer,
+        kernel_initializer=tf_utils.clone_initializer(attention_initializer),
        bias_axes=None,
        name="key")
-    self.value_dense_layer = tf.keras.layers.experimental.EinsumDense(
+    self.value_dense_layer = tf.keras.layers.EinsumDense(
        "BTE,ENH->BTNH",
        output_shape=(None, self.num_heads, size_per_head),
-        kernel_initializer=attention_initializer,
+        kernel_initializer=tf_utils.clone_initializer(attention_initializer),
        bias_axes=None,
        name="value")

    output_initializer = _glorot_initializer(self.hidden_size, self.hidden_size)
-    self.output_dense_layer = tf.keras.layers.experimental.EinsumDense(
+    self.output_dense_layer = tf.keras.layers.EinsumDense(
        "BTNH,NHE->BTE",
        output_shape=(None, self.hidden_size),
        kernel_initializer=output_initializer,

--- a/official/legacy/transformer/beam_search_v1.py
+++ b/official/legacy/transformer/beam_search_v1.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/compute_bleu.py
+++ b/official/legacy/transformer/compute_bleu.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/compute_bleu_test.py
+++ b/official/legacy/transformer/compute_bleu_test.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/data_download.py
+++ b/official/legacy/transformer/data_download.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -188,7 +188,7 @@ def download_and_extract(path, url, input_filename, target_filename):
    Full paths to extracted input and target files.

  Raises:
-    OSError: if the the download/extraction fails.
+    OSError: if the download/extraction fails.
  """
  # Check if extracted files already exist in path
  input_file = find_file(path, input_filename)

--- a/official/legacy/transformer/data_pipeline.py
+++ b/official/legacy/transformer/data_pipeline.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/embedding_layer.py
+++ b/official/legacy/transformer/embedding_layer.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/ffn_layer.py
+++ b/official/legacy/transformer/ffn_layer.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/metrics.py
+++ b/official/legacy/transformer/metrics.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/misc.py
+++ b/official/legacy/transformer/misc.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/model_params.py
+++ b/official/legacy/transformer/model_params.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/model_utils.py
+++ b/official/legacy/transformer/model_utils.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/model_utils_test.py
+++ b/official/legacy/transformer/model_utils_test.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/optimizer.py
+++ b/official/legacy/transformer/optimizer.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/transformer.py
+++ b/official/legacy/transformer/transformer.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

--- a/official/legacy/transformer/transformer_forward_test.py
+++ b/official/legacy/transformer/transformer_forward_test.py
-# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.