Unverified Commit 054d11f5 authored by Srihari Humbarwadi's avatar Srihari Humbarwadi Committed by GitHub
Browse files

Merge branch 'tensorflow:master' into panoptic-deeplab

parents 14f4c9df db3eead9
......@@ -237,20 +237,21 @@ class TrainerConfig(base_config.Config):
# we will retore the model states.
recovery_max_trials: int = 0
validation_summary_subdir: str = "validation"
# Configs for differential privacy
# These configs are only effective if you use create_optimizer in
# tensorflow_models/official/core/base_task.py
differential_privacy_config: Optional[
dp_configs.DifferentialPrivacyConfig] = None
@dataclasses.dataclass
class TaskConfig(base_config.Config):
"""Config passed to task."""
init_checkpoint: str = ""
model: Optional[base_config.Config] = None
train_data: DataConfig = DataConfig()
validation_data: DataConfig = DataConfig()
name: Optional[str] = None
# Configs for differential privacy
# These configs are only effective if you use create_optimizer in
# tensorflow_models/official/core/base_task.py
differential_privacy_config: Optional[
dp_configs.DifferentialPrivacyConfig] = None
@dataclasses.dataclass
......
......@@ -214,11 +214,15 @@ def create_optimizer(task: base_task.Task,
) -> tf.keras.optimizers.Optimizer:
"""A create optimizer util to be backward compatability with new args."""
if 'dp_config' in inspect.signature(task.create_optimizer).parameters:
dp_config = None
if hasattr(params.task, 'differential_privacy_config'):
dp_config = params.task.differential_privacy_config
optimizer = task.create_optimizer(
params.trainer.optimizer_config, params.runtime,
params.trainer.differential_privacy_config)
dp_config=dp_config)
else:
if params.trainer.differential_privacy_config is not None:
if hasattr(params.task, 'differential_privacy_config'
) and params.task.differential_privacy_config is not None:
raise ValueError('Differential privacy config is specified but '
'task.create_optimizer api does not accept it.')
optimizer = task.create_optimizer(
......
Models in this `legacy` directory are mainly are used for benchmarking the
models.
Please note that the models in this `legacy` directory are not supported like
the models in official/nlp and official/vision.
......@@ -22,7 +22,7 @@ import collections
import tensorflow as tf
from official.legacy.detection.utils import box_utils
from official.vision.beta.ops import iou_similarity
from official.vision.ops import iou_similarity
from official.vision.utils.object_detection import argmax_matcher
from official.vision.utils.object_detection import balanced_positive_negative_sampler
from official.vision.utils.object_detection import box_list
......
# Image Classification
**Warning:** the features in the `image_classification/` folder have been fully
integrated into vision/beta. Please use the [new code base](../../vision/beta/README.md).
**Warning:** the features in the `image_classification/` directory have been
fully integrated into the [new code base](https://github.com/tensorflow/models/tree/benchmark/official/vision/modeling/backbones).
This folder contains TF 2.0 model examples for image classification:
This folder contains TF 2 model examples for image classification:
* [MNIST](#mnist)
* [Classifier Trainer](#classifier-trainer), a framework that uses the Keras
......@@ -17,8 +17,7 @@ For more information about other types of models, please refer to this
## Before you begin
Please make sure that you have the latest version of TensorFlow
installed and
[add the models folder to your Python path](/official/#running-the-models).
installed and add the models folder to your Python path.
### ImageNet preparation
......@@ -70,6 +69,7 @@ available GPUs at each host.
To download the data and run the MNIST sample model locally for the first time,
run one of the following command:
<details>
```bash
python3 mnist_main.py \
--model_dir=$MODEL_DIR \
......@@ -79,9 +79,11 @@ python3 mnist_main.py \
--num_gpus=$NUM_GPUS \
--download
```
</details>
To train the model on a Cloud TPU, run the following command:
<details>
```bash
python3 mnist_main.py \
--tpu=$TPU_NAME \
......@@ -91,10 +93,10 @@ python3 mnist_main.py \
--distribution_strategy=tpu \
--download
```
</details>
Note: the `--download` flag is only required the first time you run the model.
## Classifier Trainer
The classifier trainer is a unified framework for running image classification
models using Keras's compile/fit methods. Experiments should be provided in the
......@@ -111,6 +113,8 @@ be 64 * 8 = 512, and for a v3-32, the global batch size is 64 * 32 = 2048.
### ResNet50
#### On GPU:
<details>
```bash
python3 classifier_trainer.py \
--mode=train_and_eval \
......@@ -121,12 +125,15 @@ python3 classifier_trainer.py \
--config_file=configs/examples/resnet/imagenet/gpu.yaml \
--params_override='runtime.num_gpus=$NUM_GPUS'
```
</details>
To train on multiple hosts, each with GPUs attached using
[MultiWorkerMirroredStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/MultiWorkerMirroredStrategy)
please update `runtime` section in gpu.yaml
(or override using `--params_override`) with:
<details>
```YAML
# gpu.yaml
runtime:
......@@ -135,12 +142,16 @@ runtime:
num_gpus: $NUM_GPUS
task_index: 0
```
</details>
By having `task_index: 0` on the first host and `task_index: 1` on the second
and so on. `$HOST1` and `$HOST2` are the IP addresses of the hosts, and `port`
can be chosen any free port on the hosts. Only the first host will write
TensorBoard Summaries and save checkpoints.
#### On TPU:
<details>
```bash
python3 classifier_trainer.py \
--mode=train_and_eval \
......@@ -152,9 +163,13 @@ python3 classifier_trainer.py \
--config_file=configs/examples/resnet/imagenet/tpu.yaml
```
</details>
### VGG-16
#### On GPU:
<details>
```bash
python3 classifier_trainer.py \
--mode=train_and_eval \
......@@ -166,9 +181,13 @@ python3 classifier_trainer.py \
--params_override='runtime.num_gpus=$NUM_GPUS'
```
</details>
### EfficientNet
**Note: EfficientNet development is a work in progress.**
#### On GPU:
<details>
```bash
python3 classifier_trainer.py \
--mode=train_and_eval \
......@@ -180,8 +199,11 @@ python3 classifier_trainer.py \
--params_override='runtime.num_gpus=$NUM_GPUS'
```
</details>
#### On TPU:
<details>
```bash
python3 classifier_trainer.py \
--mode=train_and_eval \
......@@ -192,6 +214,7 @@ python3 classifier_trainer.py \
--data_dir=$DATA_DIR \
--config_file=configs/examples/efficientnet/imagenet/efficientnet-b0-tpu.yaml
```
</details>
Note that the number of GPU devices can be overridden in the command line using
`--params_overrides`. The TPU does not need this override as the device is fixed
......
......@@ -19,6 +19,7 @@ import dataclasses
from official.core import config_definitions as cfg
from official.modeling import hyperparams
from official.modeling.privacy import configs as dp_configs
@dataclasses.dataclass
......@@ -35,6 +36,8 @@ class MultiTaskConfig(hyperparams.Config):
init_checkpoint: str = ""
model: hyperparams.Config = None
task_routines: Tuple[TaskRoutine, ...] = ()
differential_privacy_config: Optional[
dp_configs.DifferentialPrivacyConfig] = None
@dataclasses.dataclass
......
......@@ -13,10 +13,12 @@
# limitations under the License.
"""Configs for differential privacy."""
import dataclasses
from official.modeling.hyperparams import base_config
@dataclasses.dataclass
class DifferentialPrivacyConfig(base_config.Config):
# Applied to the gradients
# Setting to a large number so nothing is clipped.
......
......@@ -29,10 +29,10 @@ class ConfigsTest(tf.test.TestCase):
self.assertEqual(0.0, noise_multiplier)
def test_config(self):
dp_config = configs.DifferentialPrivacyConfig({
'clipping_norm': 1.0,
'noise_multiplier': 1.0
})
dp_config = configs.DifferentialPrivacyConfig(
clipping_norm=1.0,
noise_multiplier=1.0,
)
self.assertEqual(1.0, dp_config.clipping_norm)
self.assertEqual(1.0, dp_config.noise_multiplier)
......
This directory contains binaries and utils required for input preprocessing,
tokenization, etc that can be used with model building blocks available in
NLP modeling library [nlp/modelling](https://github.com/tensorflow/models/tree/master/official/nlp/modeling)
to train custom models and validate new research ideas.
This directory contain guides to help users to train NLP models.
1. [Training guide](train.md) explain the steps to follow for training NLP
models on GPU and TPU.
2. [Pretrained_models guide](pretrained_models.md) explain how to load
pre-trained NLP models (baselines and checkpoints) that can be finetuned
further depending on application.
3. [TF-Hub guide](tfhub.md) explain how to use TF-NLP's
[export_tfhub](https://github.com/tensorflow/models/blob/master/official/nlp/tools/export_tfhub.py)
tool to export pre-trained Transformer encoders to SavedModels format that are
suitable for publication on TF Hub.
# Model Garden NLP Common Training Driver
[train.py](https://github.com/tensorflow/models/blob/master/official/nlp/train.py) is the common training driver that supports multiple
[train.py](https://github.com/tensorflow/models/blob/master/official/nlp/train.py)
is the common training driver that supports multiple
NLP tasks (e.g., pre-training, GLUE and SQuAD fine-tuning etc) and multiple
models (e.g., BERT, ALBERT, MobileBERT etc).
## Experiment Configuration
[train.py] is driven by configs defined by the [ExperimentConfig](https://github.com/tensorflow/models/blob/master/official/core/config_definitions.py)
[train.py](https://github.com/tensorflow/models/blob/master/official/nlp/train.py)
is driven by configs defined by the [ExperimentConfig](https://github.com/tensorflow/models/blob/master/official/core/config_definitions.py)
including configurations for `task`, `trainer` and `runtime`. The pre-defined
NLP related [ExperimentConfig](https://github.com/tensorflow/models/blob/master/official/core/config_definitions.py) can be found in
[configs/experiment_configs.py](https://github.com/tensorflow/models/blob/master/official/nlp/configs/experiment_configs.py).
......@@ -78,7 +80,9 @@ setting `task.validation_data.input_path` in `PARAMS`.
## Run on Cloud TPUs
Next, we will describe how to run the [train.py](https://github.com/tensorflow/models/blob/master/official/nlp/train.py) on Cloud TPUs.
Next, we will describe how to run
the [train.py](https://github.com/tensorflow/models/blob/master/official/nlp/train.py)
on Cloud TPUs.
### Setup
First, you need to create a `tf-nightly` TPU with
......@@ -99,6 +103,8 @@ pip3 install --user -r official/requirements.txt
### Fine-tuning Sentence Classification with BERT from TF-Hub
<details>
This example fine-tunes BERT-base from TF-Hub on the the Multi-Genre Natural
Language Inference (MultiNLI) corpus using TPUs.
......@@ -163,8 +169,12 @@ python3 train.py \
You can monitor the training progress in the console and find the output
models in `$OUTPUT_DIR`.
</details>
### Fine-tuning SQuAD with a pre-trained BERT checkpoint
<details>
This example fine-tunes a pre-trained BERT checkpoint on the
Stanford Question Answering Dataset (SQuAD) using TPUs.
The [SQuAD website](https://rajpurkar.github.io/SQuAD-explorer/) contains
......@@ -219,4 +229,6 @@ python3 train.py \
```
</details>
Note: More examples about pre-training will come soon.
......@@ -20,8 +20,7 @@ examples.
* [`losses`](losses) contains common loss computation used in NLP tasks.
Please see the colab
[nlp_modeling_library_intro.ipynb]
(https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/colab/nlp/nlp_modeling_library_intro.ipynb)
[nlp_modeling_library_intro.ipynb](https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/colab/nlp/nlp_modeling_library_intro.ipynb)
for how to build transformer-based NLP models using above primitives.
Besides the pre-defined primitives, it also provides scaffold classes to allow
......@@ -44,8 +43,7 @@ custom hidden layer (which will replace the Transformer instantiation in the
encoder).
Please see the colab
[customize_encoder.ipynb]
(https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/colab/nlp/customize_encoder.ipynb)
[customize_encoder.ipynb](https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/colab/nlp/customize_encoder.ipynb)
for how to use scaffold classes to build noval achitectures.
BERT and ALBERT models in this repo are implemented using this library.
......
......@@ -19,6 +19,7 @@ import tempfile
import numpy as np
import tensorflow as tf
from tensorflow import estimator as tf_estimator
from sentencepiece import SentencePieceTrainer
from official.nlp.modeling.layers import text_layers
......@@ -120,10 +121,10 @@ class BertTokenizerTest(tf.test.TestCase):
def model_fn(features, labels, mode):
del labels # Unused.
return tf.estimator.EstimatorSpec(mode=mode,
return tf_estimator.EstimatorSpec(mode=mode,
predictions=features["input_word_ids"])
estimator = tf.estimator.Estimator(model_fn=model_fn)
estimator = tf_estimator.Estimator(model_fn=model_fn)
outputs = list(estimator.predict(input_fn))
self.assertAllEqual(outputs, np.array([[2, 6, 3, 0],
[2, 4, 5, 3]]))
......@@ -231,10 +232,10 @@ class SentencepieceTokenizerTest(tf.test.TestCase):
def model_fn(features, labels, mode):
del labels # Unused.
return tf.estimator.EstimatorSpec(mode=mode,
return tf_estimator.EstimatorSpec(mode=mode,
predictions=features["input_word_ids"])
estimator = tf.estimator.Estimator(model_fn=model_fn)
estimator = tf_estimator.Estimator(model_fn=model_fn)
outputs = list(estimator.predict(input_fn))
self.assertAllEqual(outputs, np.array([[2, 8, 3, 0],
[2, 12, 3, 0]]))
......@@ -537,10 +538,10 @@ class FastWordPieceBertTokenizerTest(tf.test.TestCase):
def model_fn(features, labels, mode):
del labels # Unused.
return tf.estimator.EstimatorSpec(mode=mode,
return tf_estimator.EstimatorSpec(mode=mode,
predictions=features["input_word_ids"])
estimator = tf.estimator.Estimator(model_fn=model_fn)
estimator = tf_estimator.Estimator(model_fn=model_fn)
outputs = list(estimator.predict(input_fn))
self.assertAllEqual(outputs, np.array([[2, 6, 3, 0],
[2, 4, 5, 3]]))
......
......@@ -20,6 +20,7 @@ import tempfile
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
from tensorflow import estimator as tf_estimator
import tensorflow_hub as hub
import tensorflow_text as text
......@@ -1024,10 +1025,10 @@ class ExportPreprocessingTest(tf.test.TestCase, parameterized.TestCase):
def model_fn(features, labels, mode):
del labels # Unused.
return tf.estimator.EstimatorSpec(
return tf_estimator.EstimatorSpec(
mode=mode, predictions=features["input_word_ids"])
estimator = tf.estimator.Estimator(model_fn=model_fn)
estimator = tf_estimator.Estimator(model_fn=model_fn)
outputs = list(estimator.predict(input_fn))
self.assertAllEqual(outputs, np.array([[2, 6, 3, 0], [2, 4, 5, 3]]))
......
......@@ -18,12 +18,12 @@ task:
one_hot: true
label_smoothing: 0.1
train_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
input_path: 'imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 1024
dtype: 'float32'
validation_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
input_path: 'imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 1024
dtype: 'float32'
......
......@@ -15,12 +15,12 @@ task:
one_hot: true
label_smoothing: 0.1
train_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/train*'
input_path: 'imagenet-2012-tfrecord/train*'
is_training: true
global_batch_size: 1024
dtype: 'float32'
validation_data:
input_path: '/readahead/200M/placer/prod/home/distbelief/imagenet-tensorflow/imagenet-2012-tfrecord/valid*'
input_path: 'imagenet-2012-tfrecord/valid*'
is_training: false
global_batch_size: 1024
dtype: 'float32'
......
......@@ -28,16 +28,25 @@ from official.vision.tasks import image_classification
class ImageClassificationTask(image_classification.ImageClassificationTask):
"""A task for image classification with pruning."""
_BLOCK_LAYER_SUFFIX_MAP = {
mobilenet.Conv2DBNBlock: ('conv2d/kernel:0',),
nn_blocks.BottleneckBlock: (
'conv2d/kernel:0',
'conv2d_1/kernel:0',
'conv2d_2/kernel:0',
'conv2d_3/kernel:0',
),
nn_blocks.InvertedBottleneckBlock:
('conv2d/kernel:0', 'conv2d_1/kernel:0',
'depthwise_conv2d/depthwise_kernel:0'),
mobilenet.Conv2DBNBlock: ('conv2d/kernel:0',),
nn_blocks.InvertedBottleneckBlock: (
'conv2d/kernel:0',
'conv2d_1/kernel:0',
'conv2d_2/kernel:0',
'conv2d_3/kernel:0',
'depthwise_conv2d/depthwise_kernel:0',
),
nn_blocks.ResidualBlock: (
'conv2d/kernel:0',
'conv2d_1/kernel:0',
'conv2d_2/kernel:0',
),
}
def build_model(self) -> tf.keras.Model:
......
......@@ -173,6 +173,22 @@ class Encoder(tf.keras.layers.Layer):
x = self._norm(x)
return x
def get_config(self):
config = {
'num_layers': self._num_layers,
'mlp_dim': self._mlp_dim,
'num_heads': self._num_heads,
'dropout_rate': self._dropout_rate,
'attention_dropout_rate': self._attention_dropout_rate,
'kernel_regularizer': self._kernel_regularizer,
'inputs_positions': self._inputs_positions,
'init_stochastic_depth_rate': self._init_stochastic_depth_rate,
'kernel_initializer': self._kernel_initializer,
'add_pos_embed': self._add_pos_embed,
}
base_config = super().get_config()
return base_config.update(config)
class VisionTransformer(tf.keras.Model):
"""Class to build VisionTransformer family model."""
......
......@@ -37,6 +37,7 @@ import sys
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
from tensorflow import estimator as tf_estimator
from typing import Any, Dict, Text
from official.recommendation import constants as rconst
......@@ -85,7 +86,7 @@ def neumf_model_fn(features, labels, mode, params):
# Softmax with the first column of zeros is equivalent to sigmoid.
softmax_logits = ncf_common.convert_to_softmax_logits(logits)
if mode == tf.estimator.ModeKeys.EVAL:
if mode == tf_estimator.ModeKeys.EVAL:
duplicate_mask = tf.cast(features[rconst.DUPLICATE_MASK], tf.float32)
return _get_estimator_spec_with_metrics(
logits,
......@@ -95,7 +96,7 @@ def neumf_model_fn(features, labels, mode, params):
params["match_mlperf"],
use_tpu_spec=params["use_tpu"])
elif mode == tf.estimator.ModeKeys.TRAIN:
elif mode == tf_estimator.ModeKeys.TRAIN:
labels = tf.cast(labels, tf.int32)
valid_pt_mask = features[rconst.VALID_POINT_MASK]
......@@ -124,7 +125,7 @@ def neumf_model_fn(features, labels, mode, params):
update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
train_op = tf.group(minimize_op, update_ops)
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
return tf_estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
else:
raise NotImplementedError
......@@ -260,13 +261,13 @@ def _get_estimator_spec_with_metrics(logits: tf.Tensor,
match_mlperf)
if use_tpu_spec:
return tf.estimator.tpu.TPUEstimatorSpec(
mode=tf.estimator.ModeKeys.EVAL,
return tf_estimator.tpu.TPUEstimatorSpec(
mode=tf_estimator.ModeKeys.EVAL,
loss=cross_entropy,
eval_metrics=(metric_fn, [in_top_k, ndcg, metric_weights]))
return tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.EVAL,
return tf_estimator.EstimatorSpec(
mode=tf_estimator.ModeKeys.EVAL,
loss=cross_entropy,
eval_metric_ops=metric_fn(in_top_k, ndcg, metric_weights))
......
# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment