Move Bert to NLP. Tasks are moved to nlp/bert/

Refactor basic utils to modeling/ PiperOrigin-RevId: 269600561

Move Bert to NLP. Tasks are moved to nlp/bert/
Refactor basic utils to modeling/ PiperOrigin-RevId: 269600561
1862b9c3 · Hongkun Yu · A. Unique TensorFlower · ce237770 · 1862b9c3 · ce237770
Commit 1862b9c3 authored Sep 17, 2019 by Hongkun Yu Committed by A. Unique TensorFlower Sep 17, 2019
20 changed files
--- a/official/README.md
+++ b/official/README.md
@@ -10,12 +10,16 @@ same speed and performance with each new TensorFlow build.

 ## Tensorflow releases

-The master branch of the models are **in development**, and they target the
+The master branch of the models are **in development** with TensorFlow 2.x, and
+they target the
 [nightly binaries](https://github.com/tensorflow/tensorflow#installation) built
 from the
 [master branch of TensorFlow](https://github.com/tensorflow/tensorflow/tree/master).
-We aim to keep them backwards compatible with the latest release when possible
-(currently TensorFlow 1.5), but we cannot always guarantee compatibility.
+or install with pip:
+
+```shell
+pip install tf-nightly-2.0-preview
+```

 **Stable versions** of the official models targeting releases of TensorFlow are
 available as tagged branches or
@@ -53,13 +57,24 @@ installable Official Models package. This is being tracked in
 **NOTE:** Please make sure to follow the steps in the
 [Requirements](#requirements) section.

-*   [bert](bert): A powerful pre-trained language representation model: BERT,
-    which stands for Bidirectional Encoder Representations from Transformers.
-*   [mnist](mnist): A basic model to classify digits from the MNIST dataset.
-*   [resnet](vision/image_classification): A deep residual network that can be
-    used to classify both CIFAR-10 and ImageNet's dataset of 1000 classes.
+### Natural Language Processing:
+
+*   [bert](nlp/bert): A powerful pre-trained language representation model:
+    BERT, which stands for Bidirectional Encoder Representations from
+    Transformers.
 *   [transformer](transformer): A transformer model to translate the WMT English
    to German dataset.
+*   [xlnet](nlp/xlnet): XLNet: Generalized Autoregressive Pretraining for
+    Language Understanding
+
+### Computer Vision
+
+*   [resnet](vision/image_classification): A deep residual network that can be
+    used to classify both CIFAR-10 and ImageNet's dataset of 1000 classes.
+
+### Others
+
+*   [mnist](mnist): A basic model to classify digits from the MNIST dataset.
 *   [ncf](recommendation): Neural Collaborative Filtering model for
    recommendation tasks.


--- a/official/bert/__init__.py
+++ b/official/bert/__init__.py
--- a/official/bert/benchmark/bert_benchmark.py
+++ b/official/bert/benchmark/bert_benchmark.py
@@ -29,9 +29,9 @@ from absl.testing import flagsaver
 import tensorflow as tf
 # pylint: enable=g-bad-import-order

-from official.bert import modeling
-from official.bert import run_classifier
 from official.bert.benchmark import benchmark_utils
+from official.nlp import bert_modeling as modeling
+from official.nlp.bert import run_classifier
 from official.utils.misc import distribution_utils

 # pylint: disable=line-too-long

--- a/official/bert/benchmark/bert_squad_benchmark.py
+++ b/official/bert/benchmark/bert_squad_benchmark.py
@@ -28,9 +28,9 @@ from absl.testing import flagsaver
 import tensorflow as tf
 # pylint: enable=g-bad-import-order

-from official.bert import run_squad
 from official.bert.benchmark import benchmark_utils
 from official.bert.benchmark import squad_evaluate_v1_1
+from official.nlp.bert import run_squad
 from official.utils.misc import distribution_utils

 # pylint: disable=line-too-long

--- a/official/bert/model_training_utils.py
+++ b/official/bert/model_training_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Utilities to train BERT models."""
+"""A light weight utilities to train NLP models."""

 from __future__ import absolute_import
 from __future__ import division
@@ -22,7 +22,7 @@ import json
 import os

 from absl import logging
-import tensorflow as tf
+import tensorflow.compat.v2 as tf
 from official.utils.misc import distribution_utils
 from official.utils.misc import tpu_lib


--- a/official/modeling/tf_utils.py
+++ b/official/modeling/tf_utils.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Common TF utilities."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import six
+import tensorflow.compat.v2 as tf
+
+
+def pack_inputs(inputs):
+  """Pack a list of `inputs` tensors to a tuple.
+
+  Args:
+    inputs: a list of tensors.
+
+  Returns:
+    a tuple of tensors. if any input is None, replace it with a special constant
+    tensor.
+  """
+  inputs = tf.nest.flatten(inputs)
+  outputs = []
+  for x in inputs:
+    if x is None:
+      outputs.append(tf.constant(0, shape=[], dtype=tf.int32))
+    else:
+      outputs.append(x)
+  return tuple(outputs)
+
+
+def unpack_inputs(inputs):
+  """unpack a tuple of `inputs` tensors to a tuple.
+
+  Args:
+    inputs: a list of tensors.
+
+  Returns:
+    a tuple of tensors. if any input is a special constant tensor, replace it
+    with None.
+  """
+  inputs = tf.nest.flatten(inputs)
+  outputs = []
+  for x in inputs:
+    if is_special_none_tensor(x):
+      outputs.append(None)
+    else:
+      outputs.append(x)
+  x = tuple(outputs)
+
+  # To trick the very pointless 'unbalanced-tuple-unpacking' pylint check
+  # from triggering.
+  if len(x) == 1:
+    return x[0]
+  return tuple(outputs)
+
+
+def is_special_none_tensor(tensor):
+  """Checks if a tensor is a special None Tensor."""
+  return tensor.shape.ndims == 0 and tensor.dtype == tf.int32
+
+
+def gelu(x):
+  """Gaussian Error Linear Unit.
+
+  This is a smoother version of the RELU.
+  Original paper: https://arxiv.org/abs/1606.08415
+  Args:
+    x: float Tensor to perform activation.
+
+  Returns:
+    `x` with the GELU activation applied.
+  """
+  cdf = 0.5 * (1.0 + tf.tanh(
+      (math.sqrt(2 / math.pi) * (x + 0.044715 * tf.pow(x, 3)))))
+  return x * cdf
+
+
+def get_activation(identifier):
+  """Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`.
+
+  Args:
+    identifier: String name of the activation function.
+
+  Returns:
+    A Python function corresponding to the activation function. If
+    `identifier` is None, empty, or "linear", this will return None.
+    If `identifier` is not a string, it will return `identifier`.
+
+  Raises:
+    ValueError: The `identifier` does not correspond to a known
+      activation.
+  """
+  if identifier is None:
+    return None
+  elif isinstance(identifier, six.string_types):
+    name_to_fn = {
+        "linear": None,
+        "relu": tf.nn.relu,
+        "gelu": gelu,
+        "tanh": tf.nn.tanh,
+    }
+    identifier = str(identifier).lower()
+    if identifier not in name_to_fn:
+      raise ValueError("Unsupported activation function: %s" % (identifier))
+    return name_to_fn[identifier]
+  elif callable(identifier):
+    return identifier
+  else:
+    raise ValueError("Could not interpret activation "
+                     "function identifier: %s" % (identifier))
+
+
+def get_shape_list(tensor, expected_rank=None, name=None):
+  """Returns a list of the shape of tensor, preferring static dimensions.
+
+  Args:
+    tensor: A tf.Tensor object to find the shape of.
+    expected_rank: (optional) int. The expected rank of `tensor`. If this is
+      specified and the `tensor` has a different rank, and exception will be
+      thrown.
+    name: Optional name of the tensor for the error message.
+
+  Returns:
+    A list of dimensions of the shape of tensor. All static dimensions will
+    be returned as python integers, and dynamic dimensions will be returned
+    as tf.Tensor scalars.
+  """
+  if expected_rank is not None:
+    assert_rank(tensor, expected_rank, name)
+
+  shape = tensor.shape.as_list()
+
+  non_static_indexes = []
+  for (index, dim) in enumerate(shape):
+    if dim is None:
+      non_static_indexes.append(index)
+
+  if not non_static_indexes:
+    return shape
+
+  dyn_shape = tf.shape(tensor)
+  for index in non_static_indexes:
+    shape[index] = dyn_shape[index]
+  return shape
+
+
+def assert_rank(tensor, expected_rank, name=None):
+  """Raises an exception if the tensor rank is not of the expected rank.
+
+  Args:
+    tensor: A tf.Tensor to check the rank of.
+    expected_rank: Python integer or list of integers, expected rank.
+    name: Optional name of the tensor for the error message.
+
+  Raises:
+    ValueError: If the expected shape doesn't match the actual shape.
+  """
+  expected_rank_dict = {}
+  if isinstance(expected_rank, six.integer_types):
+    expected_rank_dict[expected_rank] = True
+  else:
+    for x in expected_rank:
+      expected_rank_dict[x] = True
+
+  actual_rank = tensor.shape.ndims
+  if actual_rank not in expected_rank_dict:
+    raise ValueError(
+        "For the tensor `%s`, the actual tensor rank `%d` (shape = %s) is not "
+        "equal to the expected tensor rank `%s`" %
+        (name, actual_rank, str(tensor.shape), str(expected_rank)))
--- a/official/bert/README.md
+++ b/official/bert/README.md
--- a/official/nlp/bert/__init__.py
+++ b/official/nlp/bert/__init__.py
+
--- a/official/bert/classifier_data_lib.py
+++ b/official/bert/classifier_data_lib.py
@@ -25,7 +25,7 @@ import os
 from absl import logging
 import tensorflow as tf

-from official.bert import tokenization
+from official.nlp.bert import tokenization


 class InputExample(object):

--- a/official/bert/common_flags.py
+++ b/official/bert/common_flags.py
--- a/official/bert/create_finetuning_data.py
+++ b/official/bert/create_finetuning_data.py
@@ -24,8 +24,8 @@ from absl import app
 from absl import flags
 import tensorflow as tf

-from official.bert import classifier_data_lib
-from official.bert import squad_lib
+from official.nlp.bert import classifier_data_lib
+from official.nlp.bert import squad_lib

 FLAGS = flags.FLAGS


--- a/official/bert/input_pipeline.py
+++ b/official/bert/input_pipeline.py
--- a/official/bert/model_saving_utils.py
+++ b/official/bert/model_saving_utils.py
--- a/official/bert/run_classifier.py
+++ b/official/bert/run_classifier.py
@@ -27,14 +27,14 @@ from absl import flags
 from absl import logging
 import tensorflow as tf

-# Import BERT model libraries.
-from official.bert import bert_models
-from official.bert import common_flags
-from official.bert import input_pipeline
-from official.bert import model_saving_utils
-from official.bert import model_training_utils
-from official.bert import modeling
-from official.bert import optimization
+# pylint: disable=g-import-not-at-top,redefined-outer-name,reimported
+from official.modeling import model_training_utils
+from official.nlp import bert_modeling as modeling
+from official.nlp import bert_models
+from official.nlp import optimization
+from official.nlp.bert import common_flags
+from official.nlp.bert import input_pipeline
+from official.nlp.bert import model_saving_utils
 from official.utils.misc import keras_utils
 from official.utils.misc import tpu_lib


--- a/official/bert/run_pretraining.py
+++ b/official/bert/run_pretraining.py
@@ -25,14 +25,14 @@ from absl import flags
 from absl import logging
 import tensorflow as tf

-# Import BERT model libraries.
-from official.bert import bert_models
-from official.bert import common_flags
-from official.bert import input_pipeline
-from official.bert import model_saving_utils
-from official.bert import model_training_utils
-from official.bert import modeling
-from official.bert import optimization
+# pylint: disable=unused-import,g-import-not-at-top,redefined-outer-name,reimported
+from official.modeling import model_training_utils
+from official.nlp import bert_modeling as modeling
+from official.nlp import bert_models
+from official.nlp import optimization
+from official.nlp.bert import common_flags
+from official.nlp.bert import input_pipeline
+from official.nlp.bert import model_saving_utils
 from official.utils.misc import tpu_lib

 flags.DEFINE_string('input_files', None,

--- a/official/bert/run_squad.py
+++ b/official/bert/run_squad.py
@@ -27,16 +27,16 @@ from absl import flags
 from absl import logging
 import tensorflow as tf

-# Import BERT model libraries.
-from official.bert import bert_models
-from official.bert import common_flags
-from official.bert import input_pipeline
-from official.bert import model_saving_utils
-from official.bert import model_training_utils
-from official.bert import modeling
-from official.bert import optimization
-from official.bert import squad_lib
-from official.bert import tokenization
+# pylint: disable=unused-import,g-import-not-at-top,redefined-outer-name,reimported
+from official.modeling import model_training_utils
+from official.nlp import bert_modeling as modeling
+from official.nlp import bert_models
+from official.nlp import optimization
+from official.nlp.bert import common_flags
+from official.nlp.bert import input_pipeline
+from official.nlp.bert import model_saving_utils
+from official.nlp.bert import squad_lib
+from official.nlp.bert import tokenization
 from official.utils.misc import keras_utils
 from official.utils.misc import tpu_lib


--- a/official/bert/squad_lib.py
+++ b/official/bert/squad_lib.py
@@ -28,8 +28,7 @@ import six
 from absl import logging
 import tensorflow as tf

-from official.bert import tokenization
-# pylint: enable=g-bad-import-order
+from official.nlp.bert import tokenization


 class SquadExample(object):

--- a/official/bert/tools/tf1_to_keras_checkpoint_converter.py
+++ b/official/bert/tools/tf1_to_keras_checkpoint_converter.py
@@ -16,7 +16,7 @@ r"""Convert checkpoints created by Estimator (tf1) to be Keras compatible.

 Keras manages variable names internally, which results in subtly different names
 for variables between the Estimator and Keras version.
-The script should be ran with TF 1.x.
+The script should be used with TF 1.x.

 Usage:

@@ -29,7 +29,7 @@ from __future__ import division
 from __future__ import print_function

 from absl import app
-import tensorflow as tf
+import tensorflow as tf  # TF 1.x

 flags = tf.flags


--- a/official/bert/tools/tf2_checkpoint_converter.py
+++ b/official/bert/tools/tf2_checkpoint_converter.py
@@ -26,8 +26,8 @@ from __future__ import print_function
 from absl import app
 from absl import flags

-import tensorflow as tf
-from official.bert import modeling
+import tensorflow as tf  # TF 1.x
+from official.nlp import bert_modeling as modeling

 FLAGS = flags.FLAGS


--- a/official/bert/tokenization.py
+++ b/official/bert/tokenization.py