"tools/git@developer.sourcefind.cn:OpenDAS/megatron-lm.git" did not exist on "edca71da3733587377e6ad5c54125f3968466947"
Unverified Commit 440e0eec authored by Stephen Wu's avatar Stephen Wu Committed by GitHub
Browse files

Merge branch 'master' into RTESuperGLUE

parents 51364cdf 9815ea67
...@@ -14,17 +14,6 @@ can take full advantage of TensorFlow for their research and product development ...@@ -14,17 +14,6 @@ can take full advantage of TensorFlow for their research and product development
## [Announcements](https://github.com/tensorflow/models/wiki/Announcements) ## [Announcements](https://github.com/tensorflow/models/wiki/Announcements)
| Date | News |
|------|------|
| July 10, 2020 | TensorFlow 2 meets the [Object Detection API](https://github.com/tensorflow/models/tree/master/research/object_detection) ([Blog](https://blog.tensorflow.org/2020/07/tensorflow-2-meets-object-detection-api.html)) |
| June 30, 2020 | [SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization](https://github.com/tensorflow/models/tree/master/official/vision/detection#train-a-spinenet-49-based-mask-r-cnn) released ([Tweet](https://twitter.com/GoogleAI/status/1278016712978264064)) |
| June 17, 2020 | [Context R-CNN: Long Term Temporal Context for Per-Camera Object Detection](https://github.com/tensorflow/models/tree/master/research/object_detection#june-17th-2020) released ([Tweet](https://twitter.com/GoogleAI/status/1276571419422253057)) |
| May 21, 2020 | [Unifying Deep Local and Global Features for Image Search (DELG)](https://github.com/tensorflow/models/tree/master/research/delf#delg) code released |
| May 19, 2020 | [MobileDets: Searching for Object Detection Architectures for Mobile Accelerators](https://github.com/tensorflow/models/tree/master/research/object_detection#may-19th-2020) released |
| May 7, 2020 | [MnasFPN with MobileNet-V2 backbone](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md#mobile-models) released for object detection |
| May 1, 2020 | [DELF: DEep Local Features](https://github.com/tensorflow/models/tree/master/research/delf) updated to support TensorFlow 2.1 |
| March 31, 2020 | [Introducing the Model Garden for TensorFlow 2](https://blog.tensorflow.org/2020/03/introducing-model-garden-for-tensorflow-2.html) ([Tweet](https://twitter.com/TensorFlow/status/1245029834633297921)) |
## Contributions ## Contributions
[![help wanted:paper implementation](https://img.shields.io/github/issues/tensorflow/models/help%20wanted%3Apaper%20implementation)](https://github.com/tensorflow/models/labels/help%20wanted%3Apaper%20implementation) [![help wanted:paper implementation](https://img.shields.io/github/issues/tensorflow/models/help%20wanted%3Apaper%20implementation)](https://github.com/tensorflow/models/labels/help%20wanted%3Apaper%20implementation)
......
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright 2020 The TensorFlow Authors. All Rights Reserved. # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
......
...@@ -26,6 +26,8 @@ import tensorflow as tf ...@@ -26,6 +26,8 @@ import tensorflow as tf
from official.core import base_task from official.core import base_task
from official.core import config_definitions from official.core import config_definitions
from official.modeling import optimization
ExperimentConfig = config_definitions.ExperimentConfig ExperimentConfig = config_definitions.ExperimentConfig
TrainerConfig = config_definitions.TrainerConfig TrainerConfig = config_definitions.TrainerConfig
...@@ -119,6 +121,10 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator): ...@@ -119,6 +121,10 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator):
self._checkpoint_exporter = checkpoint_exporter self._checkpoint_exporter = checkpoint_exporter
self._recovery = None self._recovery = None
# Creates a shadow copy of the weights to store weights moving average.
if isinstance(self._optimizer, optimization.ExponentialMovingAverage):
self._optimizer.shadow_copy(self._model)
# global_step increases by 1 after each training iteration. # global_step increases by 1 after each training iteration.
# We should have global_step.numpy() == self.optimizer.iterations.numpy() # We should have global_step.numpy() == self.optimizer.iterations.numpy()
# when there is only 1 optimizer. # when there is only 1 optimizer.
...@@ -209,7 +215,10 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator): ...@@ -209,7 +215,10 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator):
@property @property
def optimizer(self): def optimizer(self):
return self._optimizer if hasattr(self, "_optimizer"):
return self._optimizer
else:
return None
@property @property
def global_step(self): def global_step(self):
...@@ -294,6 +303,10 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator): ...@@ -294,6 +303,10 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator):
"""Sets up metrics.""" """Sets up metrics."""
for metric in self.validation_metrics + [self.validation_loss]: for metric in self.validation_metrics + [self.validation_loss]:
metric.reset_states() metric.reset_states()
# Swaps weights to test on weights moving average.
if self.optimizer and isinstance(
self.optimizer, optimization.ExponentialMovingAverage):
self.optimizer.swap_weights()
def eval_step(self, iterator): def eval_step(self, iterator):
"""See base class.""" """See base class."""
...@@ -331,6 +344,12 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator): ...@@ -331,6 +344,12 @@ class Trainer(orbit.StandardTrainer, orbit.StandardEvaluator):
logs["best_" + logs["best_" +
metric_name] = self._checkpoint_exporter.best_ckpt_logs[metric_name] metric_name] = self._checkpoint_exporter.best_ckpt_logs[metric_name]
# Swaps back weights after testing when EMA is used.
# This happens after best checkpoint export so that average weights used for
# eval are exported instead of regular weights.
if self.optimizer and isinstance(
self.optimizer, optimization.ExponentialMovingAverage):
self.optimizer.swap_weights()
return logs return logs
def eval_reduce(self, state=None, step_outputs=None): def eval_reduce(self, state=None, step_outputs=None):
......
...@@ -32,6 +32,7 @@ from official.core import base_task ...@@ -32,6 +32,7 @@ from official.core import base_task
from official.core import base_trainer as trainer_lib from official.core import base_trainer as trainer_lib
from official.core import config_definitions from official.core import config_definitions
from official.modeling.progressive import policies from official.modeling.progressive import policies
from official.modeling.progressive import utils
ExperimentConfig = config_definitions.ExperimentConfig ExperimentConfig = config_definitions.ExperimentConfig
...@@ -61,26 +62,6 @@ class ProgressiveTrainerConfig(config_definitions.TrainerConfig): ...@@ -61,26 +62,6 @@ class ProgressiveTrainerConfig(config_definitions.TrainerConfig):
export_only_final_stage_ckpt: bool = True export_only_final_stage_ckpt: bool = True
class CheckpointWithHooks(tf.train.Checkpoint):
"""Same as tf.train.Checkpoint but supports hooks.
When running continuous_eval jobs, when a new checkpoint arrives, we have to
update our model and optimizer etc. to match the stage_id of the checkpoint.
However, when orbit loads a checkpoint, it does not inform us. So we use this
class to update our model to the correct stage before checkpoint restore.
"""
def __init__(self, before_load_hook, **kwargs):
self._before_load_hook = before_load_hook
super(CheckpointWithHooks, self).__init__(**kwargs)
# override
def read(self, save_path, options=None):
self._before_load_hook(save_path)
logging.info('Ran before_load_hook.')
super(CheckpointWithHooks, self).read(save_path=save_path, options=options)
@gin.configurable @gin.configurable
class ProgressiveTrainer(trainer_lib.Trainer): class ProgressiveTrainer(trainer_lib.Trainer):
"""Implements the progressive trainer shared for TensorFlow models.""" """Implements the progressive trainer shared for TensorFlow models."""
...@@ -124,7 +105,7 @@ class ProgressiveTrainer(trainer_lib.Trainer): ...@@ -124,7 +105,7 @@ class ProgressiveTrainer(trainer_lib.Trainer):
self._global_step = orbit.utils.create_global_step() self._global_step = orbit.utils.create_global_step()
self._checkpoint = CheckpointWithHooks( self._checkpoint = utils.CheckpointWithHooks(
before_load_hook=self._update_pt_stage_from_ckpt, before_load_hook=self._update_pt_stage_from_ckpt,
global_step=self.global_step, global_step=self.global_step,
**self._task.cur_checkpoint_items) **self._task.cur_checkpoint_items)
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
"""Util classes and functions.""" """Util classes and functions."""
from absl import logging
import tensorflow as tf
# pylint: disable=g-direct-tensorflow-import # pylint: disable=g-direct-tensorflow-import
from tensorflow.python.training.tracking import tracking from tensorflow.python.training.tracking import tracking
...@@ -29,3 +32,25 @@ class VolatileTrackable(tracking.AutoTrackable): ...@@ -29,3 +32,25 @@ class VolatileTrackable(tracking.AutoTrackable):
for k, v in kwargs.items(): for k, v in kwargs.items():
delattr(self, k) # untrack this object delattr(self, k) # untrack this object
setattr(self, k, v) # track the new object setattr(self, k, v) # track the new object
class CheckpointWithHooks(tf.train.Checkpoint):
"""Same as tf.train.Checkpoint but supports hooks.
In progressive training, use this class instead of tf.train.Checkpoint.
Since the network architecture changes during progressive training, we need to
prepare something (like switch to the correct architecture) before loading the
checkpoint. This class supports a hook that will be executed before checkpoint
loading.
"""
def __init__(self, before_load_hook, **kwargs):
self._before_load_hook = before_load_hook
super(CheckpointWithHooks, self).__init__(**kwargs)
# override
def read(self, save_path, options=None):
self._before_load_hook(save_path)
logging.info('Ran before_load_hook.')
super(CheckpointWithHooks, self).read(save_path=save_path, options=options)
...@@ -17,8 +17,8 @@ ...@@ -17,8 +17,8 @@
import collections import collections
import csv import csv
import importlib import importlib
import os
import json import json
import os
from absl import logging from absl import logging
import tensorflow as tf import tensorflow as tf
...@@ -1285,6 +1285,7 @@ def convert_single_example(ex_index, example, label_list, max_seq_length, ...@@ -1285,6 +1285,7 @@ def convert_single_example(ex_index, example, label_list, max_seq_length,
return feature return feature
class AXgProcessor(DataProcessor): class AXgProcessor(DataProcessor):
"""Processor for the AXg dataset (SuperGLUE diagnostics dataset).""" """Processor for the AXg dataset (SuperGLUE diagnostics dataset)."""
......
...@@ -244,7 +244,6 @@ def generate_classifier_dataset(): ...@@ -244,7 +244,6 @@ def generate_classifier_dataset():
classifier_data_lib.AXgProcessor, classifier_data_lib.AXgProcessor,
"rte-superglue": "rte-superglue":
classifier_data_lib.RTESuperGLUEProcessor classifier_data_lib.RTESuperGLUEProcessor
} }
task_name = FLAGS.classification_task_name.lower() task_name = FLAGS.classification_task_name.lower()
if task_name not in processors: if task_name not in processors:
......
...@@ -61,8 +61,7 @@ class BertPretrainDataLoader(data_loader.DataLoader): ...@@ -61,8 +61,7 @@ class BertPretrainDataLoader(data_loader.DataLoader):
self._use_next_sentence_label = params.use_next_sentence_label self._use_next_sentence_label = params.use_next_sentence_label
self._use_position_id = params.use_position_id self._use_position_id = params.use_position_id
def _decode(self, record: tf.Tensor): def _name_to_features(self):
"""Decodes a serialized tf.Example."""
name_to_features = { name_to_features = {
'input_mask': 'input_mask':
tf.io.FixedLenFeature([self._seq_length], tf.int64), tf.io.FixedLenFeature([self._seq_length], tf.int64),
...@@ -89,7 +88,11 @@ class BertPretrainDataLoader(data_loader.DataLoader): ...@@ -89,7 +88,11 @@ class BertPretrainDataLoader(data_loader.DataLoader):
if self._use_position_id: if self._use_position_id:
name_to_features['position_ids'] = tf.io.FixedLenFeature( name_to_features['position_ids'] = tf.io.FixedLenFeature(
[self._seq_length], tf.int64) [self._seq_length], tf.int64)
return name_to_features
def _decode(self, record: tf.Tensor):
"""Decodes a serialized tf.Example."""
name_to_features = self._name_to_features()
example = tf.io.parse_single_example(record, name_to_features) example = tf.io.parse_single_example(record, name_to_features)
# tf.Example only supports tf.int64, but the TPU only supports tf.int32. # tf.Example only supports tf.int64, but the TPU only supports tf.int32.
......
...@@ -14,12 +14,16 @@ ...@@ -14,12 +14,16 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Loads dataset for the sentence prediction (classification) task.""" """Loads dataset for the sentence prediction (classification) task."""
from typing import Mapping, Optional from typing import List, Mapping, Optional
import dataclasses import dataclasses
import tensorflow as tf import tensorflow as tf
import tensorflow_hub as hub
from official.common import dataset_fn
from official.core import config_definitions as cfg from official.core import config_definitions as cfg
from official.core import input_reader from official.core import input_reader
from official.nlp import modeling
from official.nlp.data import data_loader from official.nlp.data import data_loader
from official.nlp.data import data_loader_factory from official.nlp.data import data_loader_factory
...@@ -89,3 +93,152 @@ class SentencePredictionDataLoader(data_loader.DataLoader): ...@@ -89,3 +93,152 @@ class SentencePredictionDataLoader(data_loader.DataLoader):
reader = input_reader.InputReader( reader = input_reader.InputReader(
params=self._params, decoder_fn=self._decode, parser_fn=self._parse) params=self._params, decoder_fn=self._decode, parser_fn=self._parse)
return reader.read(input_context) return reader.read(input_context)
@dataclasses.dataclass
class SentencePredictionTextDataConfig(cfg.DataConfig):
"""Data config for sentence prediction task with raw text."""
# Either set `input_path`...
input_path: str = ''
# Either `int` or `float`.
label_type: str = 'int'
# ...or `tfds_name` and `tfds_split` to specify input.
tfds_name: str = ''
tfds_split: str = ''
# The name of the text feature fields. The text features will be
# concatenated in order.
text_fields: Optional[List[str]] = None
label_field: str = 'label'
global_batch_size: int = 32
seq_length: int = 128
is_training: bool = True
# Either build preprocessing with Python code by specifying these values
# for modeling.layers.BertTokenizer()/SentencepieceTokenizer()....
tokenization: str = 'WordPiece' # WordPiece or SentencePiece
# Text vocab file if tokenization is WordPiece, or sentencepiece.ModelProto
# file if tokenization is SentencePiece.
vocab_file: str = ''
lower_case: bool = True
# ...or load preprocessing from a SavedModel at this location.
preprocessing_hub_module_url: str = ''
# Either tfrecord or sstsable or recordio.
file_type: str = 'tfrecord'
class TextProcessor(tf.Module):
"""Text features processing for sentence prediction task."""
def __init__(self,
seq_length: int,
vocab_file: Optional[str] = None,
tokenization: Optional[str] = None,
lower_case: Optional[bool] = True,
preprocessing_hub_module_url: Optional[str] = None):
if preprocessing_hub_module_url:
self._preprocessing_hub_module = hub.load(preprocessing_hub_module_url)
self._tokenizer = self._preprocessing_hub_module.tokenize
def set_shape(t):
# Before TF2.4, the sequence length dimension loaded from the
# preprocessing hub module is None, so we recover the shape here.
# TODO(b/157636658): Remove once TF2.4 is released and being used.
t.set_shape([None, seq_length])
return t
def pack_inputs_fn(inputs):
result = self._preprocessing_hub_module.bert_pack_inputs(
inputs, seq_length=seq_length)
result = tf.nest.map_structure(set_shape, result)
return result
self._pack_inputs = pack_inputs_fn
return
if tokenization == 'WordPiece':
self._tokenizer = modeling.layers.BertTokenizer(
vocab_file=vocab_file, lower_case=lower_case)
elif tokenization == 'SentencePiece':
self._tokenizer = modeling.layers.SentencepieceTokenizer(
model_file_path=vocab_file, lower_case=lower_case,
strip_diacritics=True) # Strip diacritics to follow ALBERT model
else:
raise ValueError('Unsupported tokenization: %s' % tokenization)
self._pack_inputs = modeling.layers.BertPackInputs(
seq_length=seq_length,
special_tokens_dict=self._tokenizer.get_special_tokens_dict())
def __call__(self, segments):
segments = [self._tokenizer(s) for s in segments]
# BertTokenizer returns a RaggedTensor with shape [batch, word, subword],
# and SentencepieceTokenizer returns a RaggedTensor with shape
# [batch, sentencepiece],
segments = [
tf.cast(x.merge_dims(1, -1) if x.shape.rank > 2 else x, tf.int32)
for x in segments
]
return self._pack_inputs(segments)
@data_loader_factory.register_data_loader_cls(SentencePredictionTextDataConfig)
class SentencePredictionTextDataLoader(data_loader.DataLoader):
"""Loads dataset with raw text for sentence prediction task."""
def __init__(self, params):
if bool(params.tfds_name) != bool(params.tfds_split):
raise ValueError('`tfds_name` and `tfds_split` should be specified or '
'unspecified at the same time.')
if bool(params.tfds_name) == bool(params.input_path):
raise ValueError('Must specify either `tfds_name` and `tfds_split` '
'or `input_path`.')
if not params.text_fields:
raise ValueError('Unexpected empty text fields.')
if bool(params.vocab_file) == bool(params.preprocessing_hub_module_url):
raise ValueError('Must specify exactly one of vocab_file (with matching '
'lower_case flag) or preprocessing_hub_module_url.')
self._params = params
self._text_fields = params.text_fields
self._label_field = params.label_field
self._label_type = params.label_type
self._text_processor = TextProcessor(
seq_length=params.seq_length,
vocab_file=params.vocab_file,
tokenization=params.tokenization,
lower_case=params.lower_case,
preprocessing_hub_module_url=params.preprocessing_hub_module_url)
def _bert_preprocess(self, record: Mapping[str, tf.Tensor]):
"""Berts preprocess."""
segments = [record[x] for x in self._text_fields]
model_inputs = self._text_processor(segments)
y = record[self._label_field]
return model_inputs, y
def _decode(self, record: tf.Tensor):
"""Decodes a serialized tf.Example."""
name_to_features = {}
for text_field in self._text_fields:
name_to_features[text_field] = tf.io.FixedLenFeature([], tf.string)
label_type = LABEL_TYPES_MAP[self._label_type]
name_to_features[self._label_field] = tf.io.FixedLenFeature([], label_type)
example = tf.io.parse_single_example(record, name_to_features)
# tf.Example only supports tf.int64, but the TPU only supports tf.int32.
# So cast all int64 to int32.
for name in example:
t = example[name]
if t.dtype == tf.int64:
t = tf.cast(t, tf.int32)
example[name] = t
return example
def load(self, input_context: Optional[tf.distribute.InputContext] = None):
"""Returns a tf.dataset.Dataset."""
reader = input_reader.InputReader(
dataset_fn=dataset_fn.pick_dataset_fn(self._params.file_type),
decoder_fn=self._decode if self._params.input_path else None,
params=self._params,
postprocess_fn=self._bert_preprocess)
return reader.read(input_context)
...@@ -20,10 +20,11 @@ from absl.testing import parameterized ...@@ -20,10 +20,11 @@ from absl.testing import parameterized
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from official.nlp.data import sentence_prediction_dataloader from sentencepiece import SentencePieceTrainer
from official.nlp.data import sentence_prediction_dataloader as loader
def _create_fake_dataset(output_path, seq_length, label_type): def _create_fake_preprocessed_dataset(output_path, seq_length, label_type):
"""Creates a fake dataset.""" """Creates a fake dataset."""
writer = tf.io.TFRecordWriter(output_path) writer = tf.io.TFRecordWriter(output_path)
...@@ -54,6 +55,70 @@ def _create_fake_dataset(output_path, seq_length, label_type): ...@@ -54,6 +55,70 @@ def _create_fake_dataset(output_path, seq_length, label_type):
writer.close() writer.close()
def _create_fake_raw_dataset(output_path, text_fields, label_type):
"""Creates a fake tf record file."""
writer = tf.io.TFRecordWriter(output_path)
def create_str_feature(value):
f = tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
return f
def create_int_feature(values):
f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
return f
def create_float_feature(values):
f = tf.train.Feature(float_list=tf.train.FloatList(value=list(values)))
return f
for _ in range(100):
features = {}
for text_field in text_fields:
features[text_field] = create_str_feature([b'hello world'])
if label_type == 'int':
features['label'] = create_int_feature([0])
elif label_type == 'float':
features['label'] = create_float_feature([0.5])
else:
raise ValueError('Unexpected label_type: %s' % label_type)
tf_example = tf.train.Example(features=tf.train.Features(feature=features))
writer.write(tf_example.SerializeToString())
writer.close()
def _create_fake_sentencepiece_model(output_dir):
vocab = ['a', 'b', 'c', 'd', 'e', 'abc', 'def', 'ABC', 'DEF']
model_prefix = os.path.join(output_dir, 'spm_model')
input_text_file_path = os.path.join(output_dir, 'train_input.txt')
with tf.io.gfile.GFile(input_text_file_path, 'w') as f:
f.write(' '.join(vocab + ['\n']))
# Add 7 more tokens: <pad>, <unk>, [CLS], [SEP], [MASK], <s>, </s>.
full_vocab_size = len(vocab) + 7
flags = dict(
model_prefix=model_prefix,
model_type='word',
input=input_text_file_path,
pad_id=0,
unk_id=1,
control_symbols='[CLS],[SEP],[MASK]',
vocab_size=full_vocab_size,
bos_id=full_vocab_size - 2,
eos_id=full_vocab_size - 1)
SentencePieceTrainer.Train(' '.join(
['--{}={}'.format(k, v) for k, v in flags.items()]))
return model_prefix + '.model'
def _create_fake_vocab_file(vocab_file_path):
tokens = ['[PAD]']
for i in range(1, 100):
tokens.append('[unused%d]' % i)
tokens.extend(['[UNK]', '[CLS]', '[SEP]', '[MASK]', 'hello', 'world'])
with tf.io.gfile.GFile(vocab_file_path, 'w') as outfile:
outfile.write('\n'.join(tokens))
class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase): class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase):
@parameterized.parameters(('int', tf.int32), ('float', tf.float32)) @parameterized.parameters(('int', tf.int32), ('float', tf.float32))
...@@ -61,14 +126,13 @@ class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase): ...@@ -61,14 +126,13 @@ class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase):
input_path = os.path.join(self.get_temp_dir(), 'train.tf_record') input_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
batch_size = 10 batch_size = 10
seq_length = 128 seq_length = 128
_create_fake_dataset(input_path, seq_length, label_type) _create_fake_preprocessed_dataset(input_path, seq_length, label_type)
data_config = sentence_prediction_dataloader.SentencePredictionDataConfig( data_config = loader.SentencePredictionDataConfig(
input_path=input_path, input_path=input_path,
seq_length=seq_length, seq_length=seq_length,
global_batch_size=batch_size, global_batch_size=batch_size,
label_type=label_type) label_type=label_type)
dataset = sentence_prediction_dataloader.SentencePredictionDataLoader( dataset = loader.SentencePredictionDataLoader(data_config).load()
data_config).load()
features, labels = next(iter(dataset)) features, labels = next(iter(dataset))
self.assertCountEqual(['input_word_ids', 'input_mask', 'input_type_ids'], self.assertCountEqual(['input_word_ids', 'input_mask', 'input_type_ids'],
features.keys()) features.keys())
...@@ -79,5 +143,108 @@ class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase): ...@@ -79,5 +143,108 @@ class SentencePredictionDataTest(tf.test.TestCase, parameterized.TestCase):
self.assertEqual(labels.dtype, expected_label_type) self.assertEqual(labels.dtype, expected_label_type)
class SentencePredictionTfdsDataLoaderTest(tf.test.TestCase,
parameterized.TestCase):
@parameterized.parameters(True, False)
def test_python_wordpiece_preprocessing(self, use_tfds):
batch_size = 10
seq_length = 256 # Non-default value.
lower_case = True
tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
text_fields = ['sentence1', 'sentence2']
if not use_tfds:
_create_fake_raw_dataset(tf_record_path, text_fields, label_type='int')
vocab_file_path = os.path.join(self.get_temp_dir(), 'vocab.txt')
_create_fake_vocab_file(vocab_file_path)
data_config = loader.SentencePredictionTextDataConfig(
input_path='' if use_tfds else tf_record_path,
tfds_name='glue/mrpc' if use_tfds else '',
tfds_split='train' if use_tfds else '',
text_fields=text_fields,
global_batch_size=batch_size,
seq_length=seq_length,
is_training=True,
lower_case=lower_case,
vocab_file=vocab_file_path)
dataset = loader.SentencePredictionTextDataLoader(data_config).load()
features, labels = next(iter(dataset))
self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'],
features.keys())
self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['input_mask'].shape, (batch_size, seq_length))
self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length))
self.assertEqual(labels.shape, (batch_size,))
@parameterized.parameters(True, False)
def test_python_sentencepiece_preprocessing(self, use_tfds):
batch_size = 10
seq_length = 256 # Non-default value.
lower_case = True
tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
text_fields = ['sentence1', 'sentence2']
if not use_tfds:
_create_fake_raw_dataset(tf_record_path, text_fields, label_type='int')
sp_model_file_path = _create_fake_sentencepiece_model(self.get_temp_dir())
data_config = loader.SentencePredictionTextDataConfig(
input_path='' if use_tfds else tf_record_path,
tfds_name='glue/mrpc' if use_tfds else '',
tfds_split='train' if use_tfds else '',
text_fields=text_fields,
global_batch_size=batch_size,
seq_length=seq_length,
is_training=True,
lower_case=lower_case,
tokenization='SentencePiece',
vocab_file=sp_model_file_path,
)
dataset = loader.SentencePredictionTextDataLoader(data_config).load()
features, labels = next(iter(dataset))
self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'],
features.keys())
self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['input_mask'].shape, (batch_size, seq_length))
self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length))
self.assertEqual(labels.shape, (batch_size,))
@parameterized.parameters(True, False)
def test_saved_model_preprocessing(self, use_tfds):
batch_size = 10
seq_length = 256 # Non-default value.
tf_record_path = os.path.join(self.get_temp_dir(), 'train.tf_record')
text_fields = ['sentence1', 'sentence2']
if not use_tfds:
_create_fake_raw_dataset(tf_record_path, text_fields, label_type='float')
vocab_file_path = os.path.join(self.get_temp_dir(), 'vocab.txt')
_create_fake_vocab_file(vocab_file_path)
data_config = loader.SentencePredictionTextDataConfig(
input_path='' if use_tfds else tf_record_path,
tfds_name='glue/mrpc' if use_tfds else '',
tfds_split='train' if use_tfds else '',
text_fields=text_fields,
global_batch_size=batch_size,
seq_length=seq_length,
is_training=True,
preprocessing_hub_module_url=(
'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'),
label_type='int' if use_tfds else 'float',
)
dataset = loader.SentencePredictionTextDataLoader(data_config).load()
features, labels = next(iter(dataset))
self.assertCountEqual(['input_word_ids', 'input_type_ids', 'input_mask'],
features.keys())
self.assertEqual(features['input_word_ids'].shape, (batch_size, seq_length))
self.assertEqual(features['input_mask'].shape, (batch_size, seq_length))
self.assertEqual(features['input_type_ids'].shape, (batch_size, seq_length))
self.assertEqual(labels.shape, (batch_size,))
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
# Lint as: python3
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Tool to generate api_docs for tensorflow_models/official library.
Example:
python build_docs \
--output_dir=/tmp/api_docs \
--project_short_name=tf_nlp.modeling \
--project_full_name="TensorFlow Official Models - NLP Modeling Library"
"""
import os
from absl import app
from absl import flags
from absl import logging
import tensorflow as tf
from tensorflow_docs.api_generator import doc_controls
from tensorflow_docs.api_generator import generate_lib
from tensorflow_docs.api_generator import public_api
from official.nlp import modeling as tf_nlp_modeling
FLAGS = flags.FLAGS
flags.DEFINE_string('output_dir', None, 'Where to write the resulting docs to.')
flags.DEFINE_string(
'code_url_prefix',
'https://github.com/tensorflow/models/blob/master/official/nlp/modeling/',
'The url prefix for links to code.')
flags.DEFINE_bool('search_hints', True,
'Include metadata search hints in the generated files')
flags.DEFINE_string('site_path', 'tf_nlp_modeling/api_docs/python',
'Path prefix in the _toc.yaml')
flags.DEFINE_bool('gen_report', False,
'Generate an API report containing the health of the '
'docstrings of the public API.')
flags.DEFINE_string(
'project_short_name', 'tf_nlp.modeling',
'The project short name referring to the python module to document.')
flags.DEFINE_string('project_full_name',
'TensorFlow Official Models - NLP Modeling Library',
'The main title for the project.')
def _hide_module_model_and_layer_methods():
"""Hide methods and properties defined in the base classes of Keras layers.
We hide all methods and properties of the base classes, except:
- `__init__` is always documented.
- `call` is always documented, as it can carry important information for
complex layers.
"""
module_contents = list(tf.Module.__dict__.items())
model_contents = list(tf.keras.Model.__dict__.items())
layer_contents = list(tf.keras.layers.Layer.__dict__.items())
for name, obj in module_contents + layer_contents + model_contents:
if name == '__init__':
# Always document __init__.
continue
if name == 'call':
# Always document `call`.
if hasattr(obj, doc_controls._FOR_SUBCLASS_IMPLEMENTERS): # pylint: disable=protected-access
delattr(obj, doc_controls._FOR_SUBCLASS_IMPLEMENTERS) # pylint: disable=protected-access
continue
# Otherwise, exclude from documentation.
if isinstance(obj, property):
obj = obj.fget
if isinstance(obj, (staticmethod, classmethod)):
obj = obj.__func__
try:
doc_controls.do_not_doc_in_subclasses(obj)
except AttributeError:
pass
def gen_api_docs(code_url_prefix, site_path, output_dir, gen_report,
project_short_name, project_full_name, search_hints):
"""Generates api docs for the tensorflow docs package."""
_hide_module_model_and_layer_methods()
doc_generator = generate_lib.DocGenerator(
root_title=project_full_name,
py_modules=[(project_short_name, tf_nlp_modeling)],
base_dir=os.path.dirname(tf_nlp_modeling.__file__),
code_url_prefix=code_url_prefix,
search_hints=search_hints,
site_path=site_path,
gen_report=gen_report,
callbacks=[public_api.explicit_package_contents_filter],
)
doc_generator.build(output_dir)
logging.info('Output docs to: %s', output_dir)
def main(argv):
if len(argv) > 1:
raise app.UsageError('Too many command-line arguments.')
gen_api_docs(
code_url_prefix=FLAGS.code_url_prefix,
site_path=FLAGS.site_path,
output_dir=FLAGS.output_dir,
gen_report=FLAGS.gen_report,
project_short_name=FLAGS.project_short_name,
project_full_name=FLAGS.project_full_name,
search_hints=FLAGS.search_hints)
if __name__ == '__main__':
flags.mark_flag_as_required('output_dir')
app.run(main)
# Lint as: python3
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for official.tools.build_docs."""
import os
import shutil
import tensorflow as tf
from official.utils.docs import build_docs
class BuildDocsTest(tf.test.TestCase):
def setUp(self):
super(BuildDocsTest, self).setUp()
self.workdir = self.get_temp_dir()
if os.path.exists(self.workdir):
shutil.rmtree(self.workdir)
os.makedirs(self.workdir)
def test_api_gen(self):
build_docs.gen_api_docs(
code_url_prefix="http://official/nlp/modeling/",
site_path="tf_nlp_modeling/api_docs/python",
output_dir=self.workdir,
gen_report=False,
project_short_name="tf_nlp_modeling",
project_full_name="TensorFlow Modeling - NLP Library",
search_hints=True)
# Check that the "defined in" section is working
with open(os.path.join(self.workdir, "tf_nlp_modeling.md")) as f:
content = f.read()
self.assertIn("__init__.py", content)
if __name__ == "__main__":
tf.test.main()
...@@ -68,3 +68,25 @@ TF Vision model garden provides a large collection of baselines and checkpoints ...@@ -68,3 +68,25 @@ TF Vision model garden provides a large collection of baselines and checkpoints
| backbone | resolution | epochs | FLOPs (B) | params (M) | box AP | mask AP | download | | backbone | resolution | epochs | FLOPs (B) | params (M) | box AP | mask AP | download |
| ------------ |:-------------:| ---------:|-----------:|--------:|--------:|-----------:|-----------:| | ------------ |:-------------:| ---------:|-----------:|--------:|--------:|-----------:|-----------:|
| SpineNet-49 | 640x640 | 350 | 215.7 | 40.8 | 42.6 | 37.9 | config | | SpineNet-49 | 640x640 | 350 | 215.7 | 40.8 | 42.6 | 37.9 | config |
## Video Classification
### Common Settings and Notes
* We provide models for video classification with two backbones: [SlowOnly](https://arxiv.org/abs/1812.03982) and 3D-ResNet (R3D) used in [Spatiotemporal Contrastive Video Representation Learning](https://arxiv.org/abs/2008.03800).
* Training and evaluation details:
* All models are trained from scratch with vision modality (RGB) for 200 epochs.
* We use batch size of 1024 and cosine learning rate decay with linear warmup in first 5 epochs.
* We follow [SlowFast](https://arxiv.org/abs/1812.03982) to perform 30-view evaluation.
### Kinetics-400 Action Recognition Baselines
| model | input (frame x stride) | Top-1 | Top-5 | download |
| -------- |:----------------------:|--------:|--------:|---------:|
| SlowOnly | 8 x 8 | 74.1 | 91.4 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly8x8_tpu.yaml) |
| SlowOnly | 16 x 4 | 75.6 | 92.1 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_slowonly16x4_tpu.yaml) |
| R3D-50 | 32 x 2 | 77.0 | 93.0 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k400_3d-resnet50_tpu.yaml) |
### Kinetics-600 Action Recognition Baselines
| model | input (frame x stride) | Top-1 | Top-5 | download |
| -------- |:----------------------:|--------:|--------:|---------:|
| SlowOnly | 8 x 8 | 77.3 | 93.6 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_slowonly8x8_tpu.yaml) |
| R3D-50 | 32 x 2 | 79.5 | 94.8 | [config](https://github.com/tensorflow/models/blob/master/official/vision/beta/configs/experiments/video_classification/k600_3d-resnet50_tpu.yaml) |
# ResNet-50 ImageNet classification. 78.1% top-1 and 93.9% top-5 accuracy.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
model:
num_classes: 1001
input_size: [224, 224, 3]
backbone:
type: 'resnet'
resnet:
model_id: 50
norm_activation:
activation: 'swish'
losses:
l2_weight_decay: 0.0001
one_hot: true
label_smoothing: 0.1
train_data:
input_path: ''
tfds_name: 'imagenet2012'
tfds_split: 'train'
sharding: true
is_training: true
global_batch_size: 4096
dtype: 'bfloat16'
validation_data:
input_path: ''
tfds_name: 'imagenet2012'
tfds_split: 'validation'
sharding: true
is_training: false
global_batch_size: 4096
dtype: 'bfloat16'
drop_remainder: false
trainer:
train_steps: 62400
validation_steps: 13
validation_interval: 312
steps_per_loop: 312
summary_interval: 312
checkpoint_interval: 312
optimizer_config:
optimizer:
type: 'sgd'
sgd:
momentum: 0.9
learning_rate:
type: 'cosine'
cosine:
initial_learning_rate: 1.6
decay_steps: 62400
warmup:
type: 'linear'
linear:
warmup_steps: 1560
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'bfloat16'
task:
annotation_file: '' # Can't use annotation file when tfds is used.
losses:
l2_weight_decay: 0.0001
model:
num_classes: 91
max_level: 7
min_level: 3
input_size: [640, 640, 3]
norm_activation:
activation: relu
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
train_data:
tfds_name: 'coco/2017'
tfds_split: 'train'
drop_remainder: true
dtype: bfloat16
global_batch_size: 256
input_path: ''
is_training: true
shuffle_buffer_size: 1000
validation_data:
tfds_name: 'coco/2017'
tfds_split: 'validation'
drop_remainder: true
dtype: bfloat16
global_batch_size: 8
input_path: ''
is_training: false
# Use your own cityscapes preprocessed dataset. 79% meanIoU.
runtime:
distribution_strategy: 'tpu'
mixed_precision_dtype: 'float32'
task:
model:
num_classes: 19
input_size: [null, null, 3]
backbone:
type: 'dilated_resnet'
dilated_resnet:
model_id: 101
output_stride: 16
stem_type: 'v1'
se_ratio: 0.25
stochastic_depth_drop_rate: 0.2
multigrid: [1, 2, 4]
last_stage_repeats: 1
decoder:
aspp:
pool_kernel_size: [512, 1024]
head:
feature_fusion: 'deeplabv3plus'
low_level: 2
low_level_num_filters: 48
norm_activation:
activation: 'swish'
norm_epsilon: 0.001
norm_momentum: 0.99
use_sync_bn: true
losses:
top_k_percent_pixels: 1.0 # only backpropagate loss for the topk 100% pixels.
train_data:
output_size: [512, 1024]
train_on_crops: true
input_path: ''
tfds_name: 'cityscapes/semantic_segmentation'
tfds_split: 'train'
is_training: true
global_batch_size: 16
dtype: 'float32'
aug_rand_hflip: true
aug_scale_max: 2.0
aug_scale_min: 0.5
validation_data:
output_size: [1024, 2048]
input_path: ''
tfds_name: 'cityscapes/semantic_segmentation'
tfds_split: 'validation'
is_training: false
global_batch_size: 16
dtype: 'float32'
drop_remainder: false
resize_eval_groundtruth: true
trainer:
optimizer_config:
learning_rate:
polynomial:
decay_steps: 90000
initial_learning_rate: 0.01
power: 0.9
type: polynomial
optimizer:
sgd:
momentum: 0.9
type: sgd
warmup:
linear:
name: linear
warmup_learning_rate: 0
warmup_steps: 925
type: linear
steps_per_loop: 185
summary_interval: 185
train_steps: 90000
validation_interval: 185
validation_steps: 31
checkpoint_interval: 185
...@@ -283,11 +283,11 @@ def generate_annotations(images, image_dir, ...@@ -283,11 +283,11 @@ def generate_annotations(images, image_dir,
"""Generator for COCO annotations.""" """Generator for COCO annotations."""
for image in images: for image in images:
if img_to_obj_annotation: object_annotation = (img_to_obj_annotation.get(image['id'], None) if
object_annotation = img_to_obj_annotation.get(image['id'], None) img_to_obj_annotation else None)
if img_to_caption_annotation: caption_annotaion = (img_to_caption_annotation.get(image['id'], None) if
caption_annotaion = img_to_caption_annotation.get(image['id'], None) img_to_caption_annotation else None)
yield (image, image_dir, object_annotation, id_to_name_map, yield (image, image_dir, object_annotation, id_to_name_map,
caption_annotaion, include_masks) caption_annotaion, include_masks)
......
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""TFDS Classification decoders."""
import tensorflow as tf
from official.vision.beta.dataloaders import decoder
class ClassificationDecorder(decoder.Decoder):
"""A tf.Example decoder for tfds classification datasets."""
def decode(self, serialized_example):
sample_dict = {
'image/encoded':
tf.io.encode_jpeg(serialized_example['image'], quality=100),
'image/class/label':
serialized_example['label'],
}
return sample_dict
TFDS_ID_TO_DECODER_MAP = {
'cifar10': ClassificationDecorder,
'cifar100': ClassificationDecorder,
'imagenet2012': ClassificationDecorder,
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment