Commit 356c98bd authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge remote-tracking branch 'upstream/master' into detr-push-3

parents d31aba8a b9785623
......@@ -17,12 +17,15 @@
import collections
import json
import os
from absl import logging
import dataclasses
import orbit
import tensorflow as tf
import tensorflow_hub as hub
from official.core import base_task
from official.core import task_factory
from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.bert import squad_evaluate_v1_1
......@@ -39,8 +42,7 @@ from official.nlp.tasks import utils
@dataclasses.dataclass
class ModelConfig(base_config.Config):
"""A base span labeler configuration."""
encoder: encoders.TransformerEncoderConfig = (
encoders.TransformerEncoderConfig())
encoder: encoders.EncoderConfig = encoders.EncoderConfig()
@dataclasses.dataclass
......@@ -57,7 +59,7 @@ class QuestionAnsweringConfig(cfg.TaskConfig):
validation_data: cfg.DataConfig = cfg.DataConfig()
@base_task.register_task_cls(QuestionAnsweringConfig)
@task_factory.register_task_cls(QuestionAnsweringConfig)
class QuestionAnsweringTask(base_task.Task):
"""Task object for question answering."""
......@@ -83,17 +85,21 @@ class QuestionAnsweringTask(base_task.Task):
self._tf_record_input_path, self._eval_examples, self._eval_features = (
self._preprocess_eval_data(params.validation_data))
def set_preprocessed_eval_input_path(self, eval_input_path):
"""Sets the path to the preprocessed eval data."""
self._tf_record_input_path = eval_input_path
def build_model(self):
if self._hub_module:
encoder_network = utils.get_encoder_from_hub(self._hub_module)
else:
encoder_network = encoders.instantiate_encoder_from_cfg(
self.task_config.model.encoder)
encoder_network = encoders.build_encoder(self.task_config.model.encoder)
encoder_cfg = self.task_config.model.encoder.get()
# Currently, we only supports bert-style question answering finetuning.
return models.BertSpanLabeler(
network=encoder_network,
initializer=tf.keras.initializers.TruncatedNormal(
stddev=self.task_config.model.encoder.initializer_range))
stddev=encoder_cfg.initializer_range))
def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor:
start_positions = labels['start_positions']
......@@ -241,10 +247,6 @@ class QuestionAnsweringTask(base_task.Task):
step_outputs['end_logits']):
u_ids, s_logits, e_logits = (
unique_ids.numpy(), start_logits.numpy(), end_logits.numpy())
if u_ids.size == 1:
u_ids = [u_ids]
s_logits = [s_logits]
e_logits = [e_logits]
for values in zip(u_ids, s_logits, e_logits):
state.append(self.raw_aggregated_result(
unique_id=values[0],
......@@ -291,16 +293,45 @@ class QuestionAnsweringTask(base_task.Task):
'final_f1': eval_metrics['final_f1']}
return eval_metrics
def initialize(self, model):
"""Load a pretrained checkpoint (if exists) and then train from iter 0."""
ckpt_dir_or_file = self.task_config.init_checkpoint
if tf.io.gfile.isdir(ckpt_dir_or_file):
ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
if not ckpt_dir_or_file:
return
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
status = ckpt.read(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
logging.info('Finished loading pretrained checkpoint from %s',
ckpt_dir_or_file)
def predict(task: QuestionAnsweringTask, params: cfg.DataConfig,
model: tf.keras.Model):
"""Predicts on the input data.
Args:
task: A `QuestionAnsweringTask` object.
params: A `cfg.DataConfig` object.
model: A keras.Model.
Returns:
A tuple of `all_predictions`, `all_nbest` and `scores_diff`, which
are dict and can be written to json files including prediction json file,
nbest json file and null_odds json file.
"""
tf_record_input_path, eval_examples, eval_features = (
task._preprocess_eval_data(params)) # pylint: disable=protected-access
# `tf_record_input_path` will overwrite `params.input_path`,
# when `task.buid_inputs()` is called.
task.set_preprocessed_eval_input_path(tf_record_input_path)
def predict_step(inputs):
"""Replicated prediction calculation."""
return task.validation_step(inputs, model)
dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(),
task.build_inputs, params)
aggregated_outputs = utils.predict(predict_step, task.aggregate_logs, dataset)
all_predictions, all_nbest, scores_diff = (
task.squad_lib.postprocess_output(
eval_examples,
eval_features,
aggregated_outputs,
task.task_config.n_best_size,
task.task_config.max_answer_length,
task.task_config.validation_data.do_lower_case,
version_2_with_negative=(params.version_2_with_negative),
null_score_diff_threshold=task.task_config.null_score_diff_threshold,
verbose=False))
return all_predictions, all_nbest, scores_diff
......@@ -25,6 +25,7 @@ from official.nlp.bert import export_tfhub
from official.nlp.configs import bert
from official.nlp.configs import encoders
from official.nlp.data import question_answering_dataloader
from official.nlp.tasks import masked_lm
from official.nlp.tasks import question_answering
......@@ -32,21 +33,37 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self):
super(QuestionAnsweringTaskTest, self).setUp()
self._encoder_config = encoders.TransformerEncoderConfig(
vocab_size=30522, num_layers=1)
self._encoder_config = encoders.EncoderConfig(
bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1))
self._train_data_config = question_answering_dataloader.QADataConfig(
input_path="dummy",
seq_length=128,
global_batch_size=1)
val_data = {"version": "1.1",
"data": [{"paragraphs": [
{"context": "Sky is blue.",
"qas": [{"question": "What is blue?", "id": "1234",
"answers": [{"text": "Sky", "answer_start": 0},
{"text": "Sky", "answer_start": 0},
{"text": "Sky", "answer_start": 0}]
}]}]}]}
input_path="dummy", seq_length=128, global_batch_size=1)
val_data = {
"version":
"1.1",
"data": [{
"paragraphs": [{
"context":
"Sky is blue.",
"qas": [{
"question":
"What is blue?",
"id":
"1234",
"answers": [{
"text": "Sky",
"answer_start": 0
}, {
"text": "Sky",
"answer_start": 0
}, {
"text": "Sky",
"answer_start": 0
}]
}]
}]
}]
}
self._val_input_path = os.path.join(self.get_temp_dir(), "val_data.json")
with tf.io.gfile.GFile(self._val_input_path, "w") as writer:
writer.write(json.dumps(val_data, indent=4) + "\n")
......@@ -81,23 +98,26 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
val_dataset = task.build_inputs(config.validation_data)
val_iterator = iter(val_dataset)
logs = task.validation_step(next(val_iterator), model, metrics=metrics)
# Mock that `logs` is from one replica.
logs = {x: (logs[x],) for x in logs}
logs = task.aggregate_logs(step_outputs=logs)
metrics = task.reduce_aggregated_logs(logs)
self.assertIn("final_f1", metrics)
@parameterized.parameters(itertools.product(
@parameterized.parameters(
itertools.product(
(False, True),
("WordPiece", "SentencePiece"),
))
def test_task(self, version_2_with_negative, tokenization):
# Saves a checkpoint.
pretrain_cfg = bert.BertPretrainerConfig(
pretrain_cfg = bert.PretrainerConfig(
encoder=self._encoder_config,
cls_heads=[
bert.ClsHeadConfig(
inner_dim=10, num_classes=3, name="next_sentence")
])
pretrain_model = bert.instantiate_pretrainer_from_cfg(pretrain_cfg)
pretrain_model = masked_lm.MaskedLMTask(None).build_model(pretrain_cfg)
ckpt = tf.train.Checkpoint(
model=pretrain_model, **pretrain_model.checkpoint_items)
saved_path = ckpt.save(self.get_temp_dir())
......@@ -160,6 +180,27 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
validation_data=self._get_validation_data_config())
self._run_task(config)
@parameterized.named_parameters(("squad1", False), ("squad2", True))
def test_predict(self, version_2_with_negative):
validation_data = self._get_validation_data_config(
version_2_with_negative=version_2_with_negative)
config = question_answering.QuestionAnsweringConfig(
model=question_answering.ModelConfig(encoder=self._encoder_config),
train_data=self._train_data_config,
validation_data=validation_data)
task = question_answering.QuestionAnsweringTask(config)
model = task.build_model()
all_predictions, all_nbest, scores_diff = question_answering.predict(
task, validation_data, model)
self.assertLen(all_predictions, 1)
self.assertLen(all_nbest, 1)
if version_2_with_negative:
self.assertLen(scores_diff, 1)
else:
self.assertEmpty(scores_diff)
if __name__ == "__main__":
tf.test.main()
......@@ -26,6 +26,7 @@ import tensorflow as tf
import tensorflow_hub as hub
from official.core import base_task
from official.core import task_factory
from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.configs import encoders
......@@ -43,8 +44,7 @@ class ModelConfig(base_config.Config):
"""A classifier/regressor configuration."""
num_classes: int = 0
use_encoder_pooler: bool = False
encoder: encoders.TransformerEncoderConfig = (
encoders.TransformerEncoderConfig())
encoder: encoders.EncoderConfig = encoders.EncoderConfig()
@dataclasses.dataclass
......@@ -62,7 +62,7 @@ class SentencePredictionConfig(cfg.TaskConfig):
validation_data: cfg.DataConfig = cfg.DataConfig()
@base_task.register_task_cls(SentencePredictionConfig)
@task_factory.register_task_cls(SentencePredictionConfig)
class SentencePredictionTask(base_task.Task):
"""Task object for sentence_prediction."""
......@@ -84,15 +84,14 @@ class SentencePredictionTask(base_task.Task):
if self._hub_module:
encoder_network = utils.get_encoder_from_hub(self._hub_module)
else:
encoder_network = encoders.instantiate_encoder_from_cfg(
self.task_config.model.encoder)
encoder_network = encoders.build_encoder(self.task_config.model.encoder)
encoder_cfg = self.task_config.model.encoder.get()
# Currently, we only support bert-style sentence prediction finetuning.
return models.BertClassifier(
network=encoder_network,
num_classes=self.task_config.model.num_classes,
initializer=tf.keras.initializers.TruncatedNormal(
stddev=self.task_config.model.encoder.initializer_range),
stddev=encoder_cfg.initializer_range),
use_encoder_pooler=self.task_config.model.use_encoder_pooler)
def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor:
......@@ -244,11 +243,7 @@ def predict(task: SentencePredictionTask, params: cfg.DataConfig,
"""
is_regression = task.task_config.model.num_classes == 1
@tf.function
def predict_step(iterator):
"""Predicts on distributed devices."""
def _replicated_step(inputs):
def predict_step(inputs):
"""Replicated prediction calculation."""
x, _ = inputs
outputs = task.inference_step(x, model)
......@@ -257,21 +252,16 @@ def predict(task: SentencePredictionTask, params: cfg.DataConfig,
else:
return tf.argmax(outputs, axis=-1)
outputs = tf.distribute.get_strategy().run(
_replicated_step, args=(next(iterator),))
return tf.nest.map_structure(
tf.distribute.get_strategy().experimental_local_results, outputs)
def reduce_fn(state, outputs):
def aggregate_fn(state, outputs):
"""Concatenates model's outputs."""
if state is None:
state = {'predictions': []}
for per_replica_batch_predictions in outputs:
state.extend(per_replica_batch_predictions)
state['predictions'].extend(per_replica_batch_predictions)
return state
loop_fn = orbit.utils.create_loop_fn(predict_step)
dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(),
task.build_inputs, params)
# Set `num_steps` to -1 to exhaust the dataset.
predictions = loop_fn(
iter(dataset), num_steps=-1, state=[], reduce_fn=reduce_fn)
return predictions
outputs = utils.predict(predict_step, aggregate_fn, dataset)
return outputs['predictions']
......@@ -26,6 +26,7 @@ from official.nlp.bert import export_tfhub
from official.nlp.configs import bert
from official.nlp.configs import encoders
from official.nlp.data import sentence_prediction_dataloader
from official.nlp.tasks import masked_lm
from official.nlp.tasks import sentence_prediction
......@@ -68,8 +69,8 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
def get_model_config(self, num_classes):
return sentence_prediction.ModelConfig(
encoder=encoders.TransformerEncoderConfig(
vocab_size=30522, num_layers=1),
encoder=encoders.EncoderConfig(
bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)),
num_classes=num_classes)
def _run_task(self, config):
......@@ -102,14 +103,14 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
task.validation_step(next(iterator), model, metrics=metrics)
# Saves a checkpoint.
pretrain_cfg = bert.BertPretrainerConfig(
encoder=encoders.TransformerEncoderConfig(
vocab_size=30522, num_layers=1),
pretrain_cfg = bert.PretrainerConfig(
encoder=encoders.EncoderConfig(
bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)),
cls_heads=[
bert.ClsHeadConfig(
inner_dim=10, num_classes=3, name="next_sentence")
])
pretrain_model = bert.instantiate_pretrainer_from_cfg(pretrain_cfg)
pretrain_model = masked_lm.MaskedLMTask(None).build_model(pretrain_cfg)
ckpt = tf.train.Checkpoint(
model=pretrain_model, **pretrain_model.checkpoint_items)
ckpt.save(config.init_checkpoint)
......@@ -136,8 +137,8 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
if num_classes == 1:
self.assertIsInstance(metrics[0], tf.keras.metrics.MeanSquaredError)
else:
self.assertIsInstance(
metrics[0], tf.keras.metrics.SparseCategoricalAccuracy)
self.assertIsInstance(metrics[0],
tf.keras.metrics.SparseCategoricalAccuracy)
dataset = task.build_inputs(config.train_data)
iterator = iter(dataset)
......@@ -147,9 +148,9 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
logs = task.validation_step(next(iterator), model, metrics=metrics)
loss = logs["loss"].numpy()
if num_classes == 1:
self.assertAlmostEqual(loss, 42.77483, places=3)
self.assertGreater(loss, 1.0)
else:
self.assertAlmostEqual(loss, 3.57627e-6, places=3)
self.assertLess(loss, 1.0)
@parameterized.parameters(("matthews_corrcoef", 2),
("pearson_spearman_corr", 1))
......
......@@ -14,7 +14,6 @@
# limitations under the License.
# ==============================================================================
"""Tagging (e.g., NER/POS) task."""
import logging
from typing import List, Optional, Tuple
import dataclasses
......@@ -26,6 +25,7 @@ import tensorflow as tf
import tensorflow_hub as hub
from official.core import base_task
from official.core import task_factory
from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.configs import encoders
......@@ -37,8 +37,7 @@ from official.nlp.tasks import utils
@dataclasses.dataclass
class ModelConfig(base_config.Config):
"""A base span labeler configuration."""
encoder: encoders.TransformerEncoderConfig = (
encoders.TransformerEncoderConfig())
encoder: encoders.EncoderConfig = encoders.EncoderConfig()
head_dropout: float = 0.1
head_initializer_range: float = 0.02
......@@ -81,7 +80,7 @@ def _masked_labels_and_weights(y_true):
return masked_y_true, tf.cast(mask, tf.float32)
@base_task.register_task_cls(TaggingConfig)
@task_factory.register_task_cls(TaggingConfig)
class TaggingTask(base_task.Task):
"""Task object for tagging (e.g., NER or POS)."""
......@@ -102,8 +101,7 @@ class TaggingTask(base_task.Task):
if self._hub_module:
encoder_network = utils.get_encoder_from_hub(self._hub_module)
else:
encoder_network = encoders.instantiate_encoder_from_cfg(
self.task_config.model.encoder)
encoder_network = encoders.build_encoder(self.task_config.model.encoder)
return models.BertTokenClassifier(
network=encoder_network,
......@@ -215,20 +213,6 @@ class TaggingTask(base_task.Task):
seqeval_metrics.accuracy_score(label_class, predict_class),
}
def initialize(self, model):
"""Load a pretrained checkpoint (if exists) and then train from iter 0."""
ckpt_dir_or_file = self.task_config.init_checkpoint
if tf.io.gfile.isdir(ckpt_dir_or_file):
ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
if not ckpt_dir_or_file:
return
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
status = ckpt.restore(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
logging.info('Finished loading pretrained checkpoint from %s',
ckpt_dir_or_file)
def predict(task: TaggingTask, params: cfg.DataConfig,
model: tf.keras.Model) -> Tuple[List[List[int]], List[int]]:
......@@ -246,11 +230,7 @@ def predict(task: TaggingTask, params: cfg.DataConfig,
sentence id of the corresponding example.
"""
@tf.function
def predict_step(iterator):
"""Predicts on distributed devices."""
def _replicated_step(inputs):
def predict_step(inputs):
"""Replicated prediction calculation."""
x, y = inputs
sentence_ids = x.pop('sentence_id')
......@@ -262,14 +242,13 @@ def predict(task: TaggingTask, params: cfg.DataConfig,
label_mask=label_mask,
sentence_ids=sentence_ids)
outputs = tf.distribute.get_strategy().run(
_replicated_step, args=(next(iterator),))
return tf.nest.map_structure(
tf.distribute.get_strategy().experimental_local_results, outputs)
def reduce_fn(state, outputs):
def aggregate_fn(state, outputs):
"""Concatenates model's outputs."""
cur_predict_ids, cur_sentence_ids = state
if state is None:
state = {'predict_ids': [], 'sentence_ids': []}
cur_predict_ids = state['predict_ids']
cur_sentence_ids = state['sentence_ids']
for batch_predict_ids, batch_label_mask, batch_sentence_ids in zip(
outputs['predict_ids'], outputs['label_mask'],
outputs['sentence_ids']):
......@@ -283,12 +262,9 @@ def predict(task: TaggingTask, params: cfg.DataConfig,
# Skip the padding label.
if tmp_label_mask[i]:
cur_predict_ids[-1].append(tmp_predict_ids[i])
return cur_predict_ids, cur_sentence_ids
return state
loop_fn = orbit.utils.create_loop_fn(predict_step)
dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(),
task.build_inputs, params)
# Set `num_steps` to -1 to exhaust the dataset.
predict_ids, sentence_ids = loop_fn(
iter(dataset), num_steps=-1, state=([], []), reduce_fn=reduce_fn)
return predict_ids, sentence_ids
outputs = utils.predict(predict_step, aggregate_fn, dataset)
return outputs['predict_ids'], outputs['sentence_ids']
......@@ -53,8 +53,8 @@ class TaggingTest(tf.test.TestCase):
def setUp(self):
super(TaggingTest, self).setUp()
self._encoder_config = encoders.TransformerEncoderConfig(
vocab_size=30522, num_layers=1)
self._encoder_config = encoders.EncoderConfig(
bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1))
self._train_data_config = tagging_data_loader.TaggingDataConfig(
input_path="dummy", seq_length=128, global_batch_size=1)
......@@ -74,7 +74,7 @@ class TaggingTest(tf.test.TestCase):
def test_task(self):
# Saves a checkpoint.
encoder = encoders.instantiate_encoder_from_cfg(self._encoder_config)
encoder = encoders.build_encoder(self._encoder_config)
ckpt = tf.train.Checkpoint(encoder=encoder)
saved_path = ckpt.save(self.get_temp_dir())
......
......@@ -14,6 +14,9 @@
# limitations under the License.
# ==============================================================================
"""Common utils for tasks."""
from typing import Any, Callable
import orbit
import tensorflow as tf
import tensorflow_hub as hub
......@@ -32,3 +35,34 @@ def get_encoder_from_hub(hub_module: str) -> tf.keras.Model:
return tf.keras.Model(
inputs=[input_word_ids, input_mask, input_type_ids],
outputs=[sequence_output, pooled_output])
def predict(predict_step_fn: Callable[[Any], Any],
aggregate_fn: Callable[[Any, Any], Any],
dataset: tf.data.Dataset):
"""Runs prediction.
Args:
predict_step_fn: A callable such as `def predict_step(inputs)`, where
`inputs` are input tensors.
aggregate_fn: A callable such as `def aggregate_fn(state, value)`, where
`value` is the outputs from `predict_step_fn`.
dataset: A `tf.data.Dataset` object.
Returns:
The aggregated predictions.
"""
@tf.function
def predict_step(iterator):
"""Predicts on distributed devices."""
outputs = tf.distribute.get_strategy().run(
predict_step_fn, args=(next(iterator),))
return tf.nest.map_structure(
tf.distribute.get_strategy().experimental_local_results, outputs)
loop_fn = orbit.utils.create_loop_fn(predict_step)
# Set `num_steps` to -1 to exhaust the dataset.
outputs = loop_fn(
iter(dataset), num_steps=-1, state=None, reduce_fn=aggregate_fn) # pytype: disable=wrong-arg-types
return outputs
......@@ -26,7 +26,7 @@ import re
import sys
import unicodedata
from absl import app as absl_app
from absl import app
from absl import flags
import six
from six.moves import range
......@@ -149,4 +149,4 @@ if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
define_compute_bleu_flags()
FLAGS = flags.FLAGS
absl_app.run(main)
app.run(main)
......@@ -23,7 +23,7 @@ import random
import tarfile
# pylint: disable=g-bad-import-order
from absl import app as absl_app
from absl import app
from absl import flags
from absl import logging
import six
......@@ -436,4 +436,4 @@ if __name__ == "__main__":
logging.set_verbosity(logging.INFO)
define_data_download_flags()
FLAGS = flags.FLAGS
absl_app.run(main)
app.run(main)
......@@ -14,32 +14,14 @@
# ==============================================================================
"""Keras layers of XLNet model in TF 2.0."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import copy
import numpy as np
import tensorflow as tf
from official.nlp.xlnet import data_utils
def gelu(x):
"""Gaussian Error Linear Unit.
This is a smoother version of the RELU.
Original paper: https://arxiv.org/abs/1606.08415
Args:
x: float Tensor to perform activation.
Returns:
`x` with the GELU activation applied.
"""
cdf = 0.5 * (1.0 + tf.tanh(
(np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
return x * cdf
return tf.keras.activations.gelu(x, approximate=True)
def rel_shift(x, klen=-1):
......
......@@ -20,7 +20,7 @@ import sys
from setuptools import find_packages
from setuptools import setup
version = '2.2.0'
version = '2.3.0'
project_name = 'tf-models-official'
......@@ -60,7 +60,7 @@ if project_name == 'tf-models-nightly':
version += '.dev' + datetime.datetime.now().strftime('%Y%m%d')
install_requires.append('tf-nightly')
else:
install_requires.append('tensorflow>=2.2.0')
install_requires.append('tensorflow>=2.3.0')
print('install_requires: ', install_requires)
print('dependency_links: ', dependency_links)
......
......@@ -32,7 +32,8 @@ from official.recommendation import movielens
def create_dataset_from_tf_record_files(input_file_pattern,
pre_batch_size,
batch_size,
is_training=True):
is_training=True,
rebatch=False):
"""Creates dataset from (tf)records files for training/evaluation."""
if pre_batch_size != batch_size:
raise ValueError("Pre-batch ({}) size is not equal to batch "
......@@ -51,6 +52,12 @@ def create_dataset_from_tf_record_files(input_file_pattern,
dataset = dataset.map(
decode_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)
if rebatch:
# A workaround for TPU Pod evaluation dataset.
# TODO (b/162341937) remove once it's fixed.
dataset = dataset.unbatch()
dataset = dataset.batch(pre_batch_size)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
return dataset
......@@ -151,12 +158,18 @@ def create_ncf_input_data(params,
params["train_dataset_path"],
input_meta_data["train_prebatch_size"],
params["batch_size"],
is_training=True)
is_training=True,
rebatch=False)
# Re-batch evaluation dataset for TPU Pods.
# TODO (b/162341937) remove once it's fixed.
eval_rebatch = (params["use_tpu"] and strategy.num_replicas_in_sync > 8)
eval_dataset = create_dataset_from_tf_record_files(
params["eval_dataset_path"],
input_meta_data["eval_prebatch_size"],
params["eval_batch_size"],
is_training=False)
is_training=False,
rebatch=eval_rebatch)
num_train_steps = int(input_meta_data["num_train_steps"])
num_eval_steps = int(input_meta_data["num_eval_steps"])
......
......@@ -235,6 +235,7 @@ def run_ncf(_):
params = ncf_common.parse_flags(FLAGS)
params["distribute_strategy"] = strategy
params["use_tpu"] = (FLAGS.distribution_strategy == "tpu")
if params["use_tpu"] and not params["keras_use_ctl"]:
logging.error("Custom training loop must be used when using TPUStrategy.")
......@@ -491,7 +492,8 @@ def run_ncf_custom_training(params,
logging.info("Done training epoch %s, epoch loss=%.3f", epoch + 1,
train_loss)
eval_input_iterator = iter(eval_input_dataset)
eval_input_iterator = iter(
strategy.experimental_distribute_dataset(eval_input_dataset))
hr_sum = 0.0
hr_count = 0.0
......
......@@ -3,7 +3,7 @@ google-api-python-client>=1.6.7
google-cloud-bigquery>=0.31.0
kaggle>=1.3.9
numpy>=1.15.4
oauth2client>=4.1.2
oauth2client
pandas>=0.22.0
psutil>=5.4.3
py-cpuinfo>=3.3.0
......@@ -21,7 +21,7 @@ pyyaml
# CV related dependencies
opencv-python-headless
Pillow
-e git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI
pycocotools
# NLP related dependencies
seqeval
sentencepiece
......@@ -26,6 +26,12 @@ from absl import logging
import tensorflow as tf
from tensorflow.python.eager import monitoring
global_batch_size_gauge = monitoring.IntGauge(
'/tensorflow/training/global_batch_size', 'TF training global batch size')
class BatchTimestamp(object):
"""A structure to store batch time stamp."""
......@@ -60,6 +66,8 @@ class TimeHistory(tf.keras.callbacks.Callback):
self.steps_in_epoch = 0
self.start_time = None
global_batch_size_gauge.get_cell().set(batch_size)
if logdir:
self.summary_writer = tf.summary.create_file_writer(logdir)
else:
......
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mock task for testing."""
import dataclasses
import numpy as np
import tensorflow as tf
from official.core import base_task
from official.core import exp_factory
from official.core import task_factory
from official.modeling.hyperparams import config_definitions as cfg
class MockModel(tf.keras.Model):
def __init__(self, network):
super().__init__()
self.network = network
def call(self, inputs):
outputs = self.network(inputs)
self.add_loss(tf.reduce_mean(outputs))
return outputs
@dataclasses.dataclass
class MockTaskConfig(cfg.TaskConfig):
pass
@task_factory.register_task_cls(MockTaskConfig)
class MockTask(base_task.Task):
"""Mock task object for testing."""
def __init__(self, params=None, logging_dir=None):
super().__init__(params=params, logging_dir=logging_dir)
def build_model(self, *arg, **kwargs):
inputs = tf.keras.layers.Input(shape=(2,), name="random", dtype=tf.float32)
outputs = tf.keras.layers.Dense(1)(inputs)
network = tf.keras.Model(inputs=inputs, outputs=outputs)
return MockModel(network)
def build_metrics(self, training: bool = True):
del training
return [tf.keras.metrics.Accuracy(name="acc")]
def build_inputs(self, params):
def generate_data(_):
x = tf.zeros(shape=(2,), dtype=tf.float32)
label = tf.zeros([1], dtype=tf.int32)
return x, label
dataset = tf.data.Dataset.range(1)
dataset = dataset.repeat()
dataset = dataset.map(
generate_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
return dataset.prefetch(buffer_size=1).batch(2, drop_remainder=True)
def aggregate_logs(self, state, step_outputs):
if state is None:
state = {}
for key, value in step_outputs.items():
if key not in state:
state[key] = []
state[key].append(
np.concatenate([np.expand_dims(v.numpy(), axis=0) for v in value]))
return state
def reduce_aggregated_logs(self, aggregated_logs):
for k, v in aggregated_logs.items():
aggregated_logs[k] = np.sum(np.stack(v, axis=0))
return aggregated_logs
@exp_factory.register_config_factory("mock")
def mock_experiment() -> cfg.ExperimentConfig:
config = cfg.ExperimentConfig(
task=MockTaskConfig(), trainer=cfg.TrainerConfig())
return config
......@@ -52,7 +52,6 @@ MASKRCNN_CFG.override({
'anchor_size': 8,
},
'rpn_head': {
'anchors_per_location': 3,
'num_convs': 2,
'num_filters': 256,
'use_separable_conv': False,
......
......@@ -39,7 +39,6 @@ RETINANET_CFG.override({
'max_num_instances': 100,
},
'retinanet_head': {
'anchors_per_location': 9,
'num_convs': 4,
'num_filters': 256,
'use_separable_conv': False,
......
......@@ -62,7 +62,6 @@ SHAPEMASK_CFG.override({
'upsample_factor': 4,
},
'retinanet_head': {
'anchors_per_location': 9,
'num_convs': 4,
'num_filters': 256,
'use_separable_conv': False,
......
......@@ -46,15 +46,15 @@ class Anchor(object):
num_scales: integer number representing intermediate scales added
on each level. For instances, num_scales=2 adds one additional
intermediate anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of float numbers representing the aspect raito anchors
aspect_ratios: list of float numbers representing the aspect ratio anchors
added on each level. The number indicates the ratio of width to height.
For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each
scale level.
anchor_size: float number representing the scale of size of the base
anchor to the feature stride 2^level.
image_size: a list of integer numbers or Tensors representing
[height, width] of the input image size.The image_size should be divided
by the largest feature stride 2^max_level.
[height, width] of the input image size.The image_size should be
divisible by the largest feature stride 2^max_level.
"""
self.min_level = min_level
self.max_level = max_level
......@@ -77,8 +77,8 @@ class Anchor(object):
for scale in range(self.num_scales):
for aspect_ratio in self.aspect_ratios:
stride = 2 ** level
intermidate_scale = 2 ** (scale / float(self.num_scales))
base_anchor_size = self.anchor_size * stride * intermidate_scale
intermediate_scale = 2 ** (scale / float(self.num_scales))
base_anchor_size = self.anchor_size * stride * intermediate_scale
aspect_x = aspect_ratio ** 0.5
aspect_y = aspect_ratio ** -0.5
half_anchor_size_x = base_anchor_size * aspect_x / 2.0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment