Commit 1862b9c3 authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Move Bert to NLP. Tasks are moved to nlp/bert/

Refactor basic utils to modeling/

PiperOrigin-RevId: 269600561
parent ce237770
......@@ -10,12 +10,16 @@ same speed and performance with each new TensorFlow build.
## Tensorflow releases
The master branch of the models are **in development**, and they target the
The master branch of the models are **in development** with TensorFlow 2.x, and
they target the
[nightly binaries](https://github.com/tensorflow/tensorflow#installation) built
from the
[master branch of TensorFlow](https://github.com/tensorflow/tensorflow/tree/master).
We aim to keep them backwards compatible with the latest release when possible
(currently TensorFlow 1.5), but we cannot always guarantee compatibility.
or install with pip:
```shell
pip install tf-nightly-2.0-preview
```
**Stable versions** of the official models targeting releases of TensorFlow are
available as tagged branches or
......@@ -53,13 +57,24 @@ installable Official Models package. This is being tracked in
**NOTE:** Please make sure to follow the steps in the
[Requirements](#requirements) section.
* [bert](bert): A powerful pre-trained language representation model: BERT,
which stands for Bidirectional Encoder Representations from Transformers.
* [mnist](mnist): A basic model to classify digits from the MNIST dataset.
* [resnet](vision/image_classification): A deep residual network that can be
used to classify both CIFAR-10 and ImageNet's dataset of 1000 classes.
### Natural Language Processing:
* [bert](nlp/bert): A powerful pre-trained language representation model:
BERT, which stands for Bidirectional Encoder Representations from
Transformers.
* [transformer](transformer): A transformer model to translate the WMT English
to German dataset.
* [xlnet](nlp/xlnet): XLNet: Generalized Autoregressive Pretraining for
Language Understanding
### Computer Vision
* [resnet](vision/image_classification): A deep residual network that can be
used to classify both CIFAR-10 and ImageNet's dataset of 1000 classes.
### Others
* [mnist](mnist): A basic model to classify digits from the MNIST dataset.
* [ncf](recommendation): Neural Collaborative Filtering model for
recommendation tasks.
......
......@@ -29,9 +29,9 @@ from absl.testing import flagsaver
import tensorflow as tf
# pylint: enable=g-bad-import-order
from official.bert import modeling
from official.bert import run_classifier
from official.bert.benchmark import benchmark_utils
from official.nlp import bert_modeling as modeling
from official.nlp.bert import run_classifier
from official.utils.misc import distribution_utils
# pylint: disable=line-too-long
......
......@@ -28,9 +28,9 @@ from absl.testing import flagsaver
import tensorflow as tf
# pylint: enable=g-bad-import-order
from official.bert import run_squad
from official.bert.benchmark import benchmark_utils
from official.bert.benchmark import squad_evaluate_v1_1
from official.nlp.bert import run_squad
from official.utils.misc import distribution_utils
# pylint: disable=line-too-long
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities to train BERT models."""
"""A light weight utilities to train NLP models."""
from __future__ import absolute_import
from __future__ import division
......@@ -22,7 +22,7 @@ import json
import os
from absl import logging
import tensorflow as tf
import tensorflow.compat.v2 as tf
from official.utils.misc import distribution_utils
from official.utils.misc import tpu_lib
......
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Common TF utilities."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import six
import tensorflow.compat.v2 as tf
def pack_inputs(inputs):
"""Pack a list of `inputs` tensors to a tuple.
Args:
inputs: a list of tensors.
Returns:
a tuple of tensors. if any input is None, replace it with a special constant
tensor.
"""
inputs = tf.nest.flatten(inputs)
outputs = []
for x in inputs:
if x is None:
outputs.append(tf.constant(0, shape=[], dtype=tf.int32))
else:
outputs.append(x)
return tuple(outputs)
def unpack_inputs(inputs):
"""unpack a tuple of `inputs` tensors to a tuple.
Args:
inputs: a list of tensors.
Returns:
a tuple of tensors. if any input is a special constant tensor, replace it
with None.
"""
inputs = tf.nest.flatten(inputs)
outputs = []
for x in inputs:
if is_special_none_tensor(x):
outputs.append(None)
else:
outputs.append(x)
x = tuple(outputs)
# To trick the very pointless 'unbalanced-tuple-unpacking' pylint check
# from triggering.
if len(x) == 1:
return x[0]
return tuple(outputs)
def is_special_none_tensor(tensor):
"""Checks if a tensor is a special None Tensor."""
return tensor.shape.ndims == 0 and tensor.dtype == tf.int32
def gelu(x):
"""Gaussian Error Linear Unit.
This is a smoother version of the RELU.
Original paper: https://arxiv.org/abs/1606.08415
Args:
x: float Tensor to perform activation.
Returns:
`x` with the GELU activation applied.
"""
cdf = 0.5 * (1.0 + tf.tanh(
(math.sqrt(2 / math.pi) * (x + 0.044715 * tf.pow(x, 3)))))
return x * cdf
def get_activation(identifier):
"""Maps a string to a Python function, e.g., "relu" => `tf.nn.relu`.
Args:
identifier: String name of the activation function.
Returns:
A Python function corresponding to the activation function. If
`identifier` is None, empty, or "linear", this will return None.
If `identifier` is not a string, it will return `identifier`.
Raises:
ValueError: The `identifier` does not correspond to a known
activation.
"""
if identifier is None:
return None
elif isinstance(identifier, six.string_types):
name_to_fn = {
"linear": None,
"relu": tf.nn.relu,
"gelu": gelu,
"tanh": tf.nn.tanh,
}
identifier = str(identifier).lower()
if identifier not in name_to_fn:
raise ValueError("Unsupported activation function: %s" % (identifier))
return name_to_fn[identifier]
elif callable(identifier):
return identifier
else:
raise ValueError("Could not interpret activation "
"function identifier: %s" % (identifier))
def get_shape_list(tensor, expected_rank=None, name=None):
"""Returns a list of the shape of tensor, preferring static dimensions.
Args:
tensor: A tf.Tensor object to find the shape of.
expected_rank: (optional) int. The expected rank of `tensor`. If this is
specified and the `tensor` has a different rank, and exception will be
thrown.
name: Optional name of the tensor for the error message.
Returns:
A list of dimensions of the shape of tensor. All static dimensions will
be returned as python integers, and dynamic dimensions will be returned
as tf.Tensor scalars.
"""
if expected_rank is not None:
assert_rank(tensor, expected_rank, name)
shape = tensor.shape.as_list()
non_static_indexes = []
for (index, dim) in enumerate(shape):
if dim is None:
non_static_indexes.append(index)
if not non_static_indexes:
return shape
dyn_shape = tf.shape(tensor)
for index in non_static_indexes:
shape[index] = dyn_shape[index]
return shape
def assert_rank(tensor, expected_rank, name=None):
"""Raises an exception if the tensor rank is not of the expected rank.
Args:
tensor: A tf.Tensor to check the rank of.
expected_rank: Python integer or list of integers, expected rank.
name: Optional name of the tensor for the error message.
Raises:
ValueError: If the expected shape doesn't match the actual shape.
"""
expected_rank_dict = {}
if isinstance(expected_rank, six.integer_types):
expected_rank_dict[expected_rank] = True
else:
for x in expected_rank:
expected_rank_dict[x] = True
actual_rank = tensor.shape.ndims
if actual_rank not in expected_rank_dict:
raise ValueError(
"For the tensor `%s`, the actual tensor rank `%d` (shape = %s) is not "
"equal to the expected tensor rank `%s`" %
(name, actual_rank, str(tensor.shape), str(expected_rank)))
......@@ -25,7 +25,7 @@ import os
from absl import logging
import tensorflow as tf
from official.bert import tokenization
from official.nlp.bert import tokenization
class InputExample(object):
......
......@@ -24,8 +24,8 @@ from absl import app
from absl import flags
import tensorflow as tf
from official.bert import classifier_data_lib
from official.bert import squad_lib
from official.nlp.bert import classifier_data_lib
from official.nlp.bert import squad_lib
FLAGS = flags.FLAGS
......
......@@ -27,14 +27,14 @@ from absl import flags
from absl import logging
import tensorflow as tf
# Import BERT model libraries.
from official.bert import bert_models
from official.bert import common_flags
from official.bert import input_pipeline
from official.bert import model_saving_utils
from official.bert import model_training_utils
from official.bert import modeling
from official.bert import optimization
# pylint: disable=g-import-not-at-top,redefined-outer-name,reimported
from official.modeling import model_training_utils
from official.nlp import bert_modeling as modeling
from official.nlp import bert_models
from official.nlp import optimization
from official.nlp.bert import common_flags
from official.nlp.bert import input_pipeline
from official.nlp.bert import model_saving_utils
from official.utils.misc import keras_utils
from official.utils.misc import tpu_lib
......
......@@ -25,14 +25,14 @@ from absl import flags
from absl import logging
import tensorflow as tf
# Import BERT model libraries.
from official.bert import bert_models
from official.bert import common_flags
from official.bert import input_pipeline
from official.bert import model_saving_utils
from official.bert import model_training_utils
from official.bert import modeling
from official.bert import optimization
# pylint: disable=unused-import,g-import-not-at-top,redefined-outer-name,reimported
from official.modeling import model_training_utils
from official.nlp import bert_modeling as modeling
from official.nlp import bert_models
from official.nlp import optimization
from official.nlp.bert import common_flags
from official.nlp.bert import input_pipeline
from official.nlp.bert import model_saving_utils
from official.utils.misc import tpu_lib
flags.DEFINE_string('input_files', None,
......
......@@ -27,16 +27,16 @@ from absl import flags
from absl import logging
import tensorflow as tf
# Import BERT model libraries.
from official.bert import bert_models
from official.bert import common_flags
from official.bert import input_pipeline
from official.bert import model_saving_utils
from official.bert import model_training_utils
from official.bert import modeling
from official.bert import optimization
from official.bert import squad_lib
from official.bert import tokenization
# pylint: disable=unused-import,g-import-not-at-top,redefined-outer-name,reimported
from official.modeling import model_training_utils
from official.nlp import bert_modeling as modeling
from official.nlp import bert_models
from official.nlp import optimization
from official.nlp.bert import common_flags
from official.nlp.bert import input_pipeline
from official.nlp.bert import model_saving_utils
from official.nlp.bert import squad_lib
from official.nlp.bert import tokenization
from official.utils.misc import keras_utils
from official.utils.misc import tpu_lib
......
......@@ -28,8 +28,7 @@ import six
from absl import logging
import tensorflow as tf
from official.bert import tokenization
# pylint: enable=g-bad-import-order
from official.nlp.bert import tokenization
class SquadExample(object):
......
......@@ -16,7 +16,7 @@ r"""Convert checkpoints created by Estimator (tf1) to be Keras compatible.
Keras manages variable names internally, which results in subtly different names
for variables between the Estimator and Keras version.
The script should be ran with TF 1.x.
The script should be used with TF 1.x.
Usage:
......@@ -29,7 +29,7 @@ from __future__ import division
from __future__ import print_function
from absl import app
import tensorflow as tf
import tensorflow as tf # TF 1.x
flags = tf.flags
......
......@@ -26,8 +26,8 @@ from __future__ import print_function
from absl import app
from absl import flags
import tensorflow as tf
from official.bert import modeling
import tensorflow as tf # TF 1.x
from official.nlp import bert_modeling as modeling
FLAGS = flags.FLAGS
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment