Commit 3ce2f61b authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge branch 'master' of https://github.com/tensorflow/models into context_tf2

parents bb16d5ca 8e9296ff
......@@ -23,18 +23,26 @@ import tensorflow as tf
import tensorflow_hub as hub
from official.core import base_task
from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.bert import input_pipeline
from official.nlp.bert import squad_evaluate_v1_1
from official.nlp.bert import squad_evaluate_v2_0
from official.nlp.bert import tokenization
from official.nlp.configs import encoders
from official.nlp.data import data_loader_factory
from official.nlp.data import squad_lib as squad_lib_wp
from official.nlp.data import squad_lib_sp
from official.nlp.modeling import models
from official.nlp.tasks import utils
@dataclasses.dataclass
class ModelConfig(base_config.Config):
"""A base span labeler configuration."""
encoder: encoders.TransformerEncoderConfig = (
encoders.TransformerEncoderConfig())
@dataclasses.dataclass
class QuestionAnsweringConfig(cfg.TaskConfig):
"""The model config."""
......@@ -44,8 +52,7 @@ class QuestionAnsweringConfig(cfg.TaskConfig):
n_best_size: int = 20
max_answer_length: int = 30
null_score_diff_threshold: float = 0.0
model: encoders.TransformerEncoderConfig = (
encoders.TransformerEncoderConfig())
model: ModelConfig = ModelConfig()
train_data: cfg.DataConfig = cfg.DataConfig()
validation_data: cfg.DataConfig = cfg.DataConfig()
......@@ -81,12 +88,12 @@ class QuestionAnsweringTask(base_task.Task):
encoder_network = utils.get_encoder_from_hub(self._hub_module)
else:
encoder_network = encoders.instantiate_encoder_from_cfg(
self.task_config.model)
self.task_config.model.encoder)
# Currently, we only supports bert-style question answering finetuning.
return models.BertSpanLabeler(
network=encoder_network,
initializer=tf.keras.initializers.TruncatedNormal(
stddev=self.task_config.model.initializer_range))
stddev=self.task_config.model.encoder.initializer_range))
def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor:
start_positions = labels['start_positions']
......@@ -174,20 +181,13 @@ class QuestionAnsweringTask(base_task.Task):
return dataset
if params.is_training:
input_path = params.input_path
dataloader_params = params
else:
input_path = self._tf_record_input_path
dataloader_params = params.replace(input_path=input_path)
batch_size = input_context.get_per_replica_batch_size(
params.global_batch_size) if input_context else params.global_batch_size
# TODO(chendouble): add and use nlp.data.question_answering_dataloader.
dataset = input_pipeline.create_squad_dataset(
input_path,
params.seq_length,
batch_size,
is_training=params.is_training,
input_pipeline_context=input_context)
return dataset
return data_loader_factory.get_data_loader(
dataloader_params).load(input_context)
def build_metrics(self, training=None):
del training
......@@ -289,5 +289,5 @@ class QuestionAnsweringTask(base_task.Task):
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
status = ckpt.read(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
logging.info('finished loading pretrained checkpoint from %s',
logging.info('Finished loading pretrained checkpoint from %s',
ckpt_dir_or_file)
......@@ -24,6 +24,7 @@ from official.nlp.bert import configs
from official.nlp.bert import export_tfhub
from official.nlp.configs import bert
from official.nlp.configs import encoders
from official.nlp.data import question_answering_dataloader
from official.nlp.tasks import question_answering
......@@ -33,7 +34,7 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
super(QuestionAnsweringTaskTest, self).setUp()
self._encoder_config = encoders.TransformerEncoderConfig(
vocab_size=30522, num_layers=1)
self._train_data_config = bert.QADataConfig(
self._train_data_config = question_answering_dataloader.QADataConfig(
input_path="dummy",
seq_length=128,
global_batch_size=1)
......@@ -55,7 +56,8 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
writer.write("[PAD]\n[UNK]\n[CLS]\n[SEP]\n[MASK]\nsky\nis\nblue\n")
def _get_validation_data_config(self, version_2_with_negative=False):
return bert.QADevDataConfig(
return question_answering_dataloader.QADataConfig(
is_training=False,
input_path=self._val_input_path,
input_preprocessed_data_path=self.get_temp_dir(),
seq_length=128,
......@@ -91,19 +93,18 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
# Saves a checkpoint.
pretrain_cfg = bert.BertPretrainerConfig(
encoder=self._encoder_config,
num_masked_tokens=20,
cls_heads=[
bert.ClsHeadConfig(
inner_dim=10, num_classes=3, name="next_sentence")
])
pretrain_model = bert.instantiate_bertpretrainer_from_cfg(pretrain_cfg)
pretrain_model = bert.instantiate_pretrainer_from_cfg(pretrain_cfg)
ckpt = tf.train.Checkpoint(
model=pretrain_model, **pretrain_model.checkpoint_items)
saved_path = ckpt.save(self.get_temp_dir())
config = question_answering.QuestionAnsweringConfig(
init_checkpoint=saved_path,
model=self._encoder_config,
model=question_answering.ModelConfig(encoder=self._encoder_config),
train_data=self._train_data_config,
validation_data=self._get_validation_data_config(
version_2_with_negative))
......@@ -111,7 +112,7 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
def test_task_with_fit(self):
config = question_answering.QuestionAnsweringConfig(
model=self._encoder_config,
model=question_answering.ModelConfig(encoder=self._encoder_config),
train_data=self._train_data_config,
validation_data=self._get_validation_data_config())
task = question_answering.QuestionAnsweringTask(config)
......@@ -154,7 +155,7 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
hub_module_url = self._export_bert_tfhub()
config = question_answering.QuestionAnsweringConfig(
hub_module_url=hub_module_url,
model=self._encoder_config,
model=question_answering.ModelConfig(encoder=self._encoder_config),
train_data=self._train_data_config,
validation_data=self._get_validation_data_config())
self._run_task(config)
......
......@@ -23,12 +23,23 @@ import tensorflow as tf
import tensorflow_hub as hub
from official.core import base_task
from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.configs import bert
from official.nlp.data import sentence_prediction_dataloader
from official.nlp.configs import encoders
from official.nlp.data import data_loader_factory
from official.nlp.modeling import models
from official.nlp.tasks import utils
@dataclasses.dataclass
class ModelConfig(base_config.Config):
"""A classifier/regressor configuration."""
num_classes: int = 0
use_encoder_pooler: bool = False
encoder: encoders.TransformerEncoderConfig = (
encoders.TransformerEncoderConfig())
@dataclasses.dataclass
class SentencePredictionConfig(cfg.TaskConfig):
"""The model config."""
......@@ -38,15 +49,8 @@ class SentencePredictionConfig(cfg.TaskConfig):
init_cls_pooler: bool = False
hub_module_url: str = ''
metric_type: str = 'accuracy'
model: bert.BertPretrainerConfig = bert.BertPretrainerConfig(
num_masked_tokens=0, # No masked language modeling head.
cls_heads=[
bert.ClsHeadConfig(
inner_dim=768,
num_classes=3,
dropout_rate=0.1,
name='sentence_prediction')
])
# Defines the concrete model config at instantiation time.
model: ModelConfig = ModelConfig()
train_data: cfg.DataConfig = cfg.DataConfig()
validation_data: cfg.DataConfig = cfg.DataConfig()
......@@ -68,21 +72,26 @@ class SentencePredictionTask(base_task.Task):
def build_model(self):
if self._hub_module:
encoder_from_hub = utils.get_encoder_from_hub(self._hub_module)
return bert.instantiate_bertpretrainer_from_cfg(
self.task_config.model, encoder_network=encoder_from_hub)
encoder_network = utils.get_encoder_from_hub(self._hub_module)
else:
return bert.instantiate_bertpretrainer_from_cfg(self.task_config.model)
encoder_network = encoders.instantiate_encoder_from_cfg(
self.task_config.model.encoder)
# Currently, we only supports bert-style sentence prediction finetuning.
return models.BertClassifier(
network=encoder_network,
num_classes=self.task_config.model.num_classes,
initializer=tf.keras.initializers.TruncatedNormal(
stddev=self.task_config.model.encoder.initializer_range),
use_encoder_pooler=self.task_config.model.use_encoder_pooler)
def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor:
loss = tf.keras.losses.sparse_categorical_crossentropy(
labels,
tf.cast(model_outputs['sentence_prediction'], tf.float32),
from_logits=True)
labels, tf.cast(model_outputs, tf.float32), from_logits=True)
if aux_losses:
loss += tf.add_n(aux_losses)
return loss
return tf.reduce_mean(loss)
def build_inputs(self, params, input_context=None):
"""Returns tf.data.Dataset for sentence_prediction task."""
......@@ -103,8 +112,7 @@ class SentencePredictionTask(base_task.Task):
dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
return dataset
return sentence_prediction_dataloader.SentencePredictionDataLoader(
params).load(input_context)
return data_loader_factory.get_data_loader(params).load(input_context)
def build_metrics(self, training=None):
del training
......@@ -113,10 +121,10 @@ class SentencePredictionTask(base_task.Task):
def process_metrics(self, metrics, labels, model_outputs):
for metric in metrics:
metric.update_state(labels, model_outputs['sentence_prediction'])
metric.update_state(labels, model_outputs)
def process_compiled_metrics(self, compiled_metrics, labels, model_outputs):
compiled_metrics.update_state(labels, model_outputs['sentence_prediction'])
compiled_metrics.update_state(labels, model_outputs)
def validation_step(self, inputs, model: tf.keras.Model, metrics=None):
if self.metric_type == 'accuracy':
......@@ -130,15 +138,13 @@ class SentencePredictionTask(base_task.Task):
if self.metric_type == 'matthews_corrcoef':
logs.update({
'sentence_prediction':
tf.expand_dims(
tf.math.argmax(outputs['sentence_prediction'], axis=1),
axis=0),
tf.expand_dims(tf.math.argmax(outputs, axis=1), axis=0),
'labels':
labels,
})
if self.metric_type == 'pearson_spearman_corr':
logs.update({
'sentence_prediction': outputs['sentence_prediction'],
'sentence_prediction': outputs,
'labels': labels,
})
return logs
......@@ -190,5 +196,5 @@ class SentencePredictionTask(base_task.Task):
ckpt = tf.train.Checkpoint(**pretrain2finetune_mapping)
status = ckpt.read(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
logging.info('finished loading pretrained checkpoint from %s',
logging.info('Finished loading pretrained checkpoint from %s',
ckpt_dir_or_file)
......@@ -24,6 +24,7 @@ from official.nlp.bert import configs
from official.nlp.bert import export_tfhub
from official.nlp.configs import bert
from official.nlp.configs import encoders
from official.nlp.data import sentence_prediction_dataloader
from official.nlp.tasks import sentence_prediction
......@@ -31,20 +32,15 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self):
super(SentencePredictionTaskTest, self).setUp()
self._train_data_config = bert.SentencePredictionDataConfig(
input_path="dummy", seq_length=128, global_batch_size=1)
self._train_data_config = (
sentence_prediction_dataloader.SentencePredictionDataConfig(
input_path="dummy", seq_length=128, global_batch_size=1))
def get_model_config(self, num_classes):
return bert.BertPretrainerConfig(
return sentence_prediction.ModelConfig(
encoder=encoders.TransformerEncoderConfig(
vocab_size=30522, num_layers=1),
num_masked_tokens=0,
cls_heads=[
bert.ClsHeadConfig(
inner_dim=10,
num_classes=num_classes,
name="sentence_prediction")
])
num_classes=num_classes)
def _run_task(self, config):
task = sentence_prediction.SentencePredictionTask(config)
......@@ -79,12 +75,11 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
pretrain_cfg = bert.BertPretrainerConfig(
encoder=encoders.TransformerEncoderConfig(
vocab_size=30522, num_layers=1),
num_masked_tokens=20,
cls_heads=[
bert.ClsHeadConfig(
inner_dim=10, num_classes=3, name="next_sentence")
])
pretrain_model = bert.instantiate_bertpretrainer_from_cfg(pretrain_cfg)
pretrain_model = bert.instantiate_pretrainer_from_cfg(pretrain_cfg)
ckpt = tf.train.Checkpoint(
model=pretrain_model, **pretrain_model.checkpoint_items)
ckpt.save(config.init_checkpoint)
......
......@@ -15,9 +15,10 @@
# ==============================================================================
"""Tagging (e.g., NER/POS) task."""
import logging
from typing import List, Optional
from typing import List, Optional, Tuple
import dataclasses
import orbit
from seqeval import metrics as seqeval_metrics
......@@ -25,21 +26,30 @@ import tensorflow as tf
import tensorflow_hub as hub
from official.core import base_task
from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.configs import encoders
from official.nlp.data import tagging_data_loader
from official.nlp.data import data_loader_factory
from official.nlp.modeling import models
from official.nlp.tasks import utils
@dataclasses.dataclass
class ModelConfig(base_config.Config):
"""A base span labeler configuration."""
encoder: encoders.TransformerEncoderConfig = (
encoders.TransformerEncoderConfig())
head_dropout: float = 0.1
head_initializer_range: float = 0.02
@dataclasses.dataclass
class TaggingConfig(cfg.TaskConfig):
"""The model config."""
# At most one of `init_checkpoint` and `hub_module_url` can be specified.
init_checkpoint: str = ''
hub_module_url: str = ''
model: encoders.TransformerEncoderConfig = (
encoders.TransformerEncoderConfig())
model: ModelConfig = ModelConfig()
# The real class names, the order of which should match real label id.
# Note that a word may be tokenized into multiple word_pieces tokens, and
......@@ -93,14 +103,14 @@ class TaggingTask(base_task.Task):
encoder_network = utils.get_encoder_from_hub(self._hub_module)
else:
encoder_network = encoders.instantiate_encoder_from_cfg(
self.task_config.model)
self.task_config.model.encoder)
return models.BertTokenClassifier(
network=encoder_network,
num_classes=len(self.task_config.class_names),
initializer=tf.keras.initializers.TruncatedNormal(
stddev=self.task_config.model.initializer_range),
dropout_rate=self.task_config.model.dropout_rate,
stddev=self.task_config.model.head_initializer_range),
dropout_rate=self.task_config.model.head_dropout,
output='logits')
def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor:
......@@ -113,7 +123,7 @@ class TaggingTask(base_task.Task):
loss = tf.math.divide_no_nan(numerator_loss, denominator_loss)
return loss
def build_inputs(self, params, input_context=None):
def build_inputs(self, params: cfg.DataConfig, input_context=None):
"""Returns tf.data.Dataset for sentence_prediction task."""
if params.input_path == 'dummy':
......@@ -138,8 +148,12 @@ class TaggingTask(base_task.Task):
dummy_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
return dataset
dataset = tagging_data_loader.TaggingDataLoader(params).load(input_context)
return dataset
return data_loader_factory.get_data_loader(params).load(input_context)
def inference_step(self, inputs, model: tf.keras.Model):
"""Performs the forward step."""
logits = model(inputs, training=False)
return {'logits': logits, 'predict_ids': tf.argmax(logits, axis=-1)}
def validation_step(self, inputs, model: tf.keras.Model, metrics=None):
"""Validatation step.
......@@ -154,12 +168,11 @@ class TaggingTask(base_task.Task):
"""
features, labels = inputs
outputs = self.inference_step(features, model)
loss = self.build_losses(labels=labels, model_outputs=outputs)
loss = self.build_losses(labels=labels, model_outputs=outputs['logits'])
# Negative label ids are padding labels which should be ignored.
real_label_index = tf.where(tf.greater_equal(labels, 0))
predict_ids = tf.math.argmax(outputs, axis=-1)
predict_ids = tf.gather_nd(predict_ids, real_label_index)
predict_ids = tf.gather_nd(outputs['predict_ids'], real_label_index)
label_ids = tf.gather_nd(labels, real_label_index)
return {
self.loss: loss,
......@@ -213,5 +226,69 @@ class TaggingTask(base_task.Task):
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
status = ckpt.restore(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
logging.info('finished loading pretrained checkpoint from %s',
logging.info('Finished loading pretrained checkpoint from %s',
ckpt_dir_or_file)
def predict(task: TaggingTask, params: cfg.DataConfig,
model: tf.keras.Model) -> Tuple[List[List[int]], List[int]]:
"""Predicts on the input data.
Args:
task: A `TaggingTask` object.
params: A `cfg.DataConfig` object.
model: A keras.Model.
Returns:
A tuple of `predict_ids` and `sentence_ids`, which are list with length
of `num_examples`. Each element in `predict_ids` is a sequence of
predicted per-word label id, and each element in `sentence_ids` is the
sentence id of the corresponding example.
"""
@tf.function
def predict_step(iterator):
"""Predicts on distributed devices."""
def _replicated_step(inputs):
"""Replicated prediction calculation."""
x, y = inputs
sentence_ids = x.pop('sentence_id')
outputs = task.inference_step(x, model)
predict_ids = outputs['predict_ids']
label_mask = tf.greater_equal(y, 0)
return dict(
predict_ids=predict_ids,
label_mask=label_mask,
sentence_ids=sentence_ids)
outputs = tf.distribute.get_strategy().experimental_run_v2(
_replicated_step, args=(next(iterator),))
return tf.nest.map_structure(
tf.distribute.get_strategy().experimental_local_results, outputs)
def reduce_fn(state, outputs):
"""Concatenates model's outputs."""
cur_predict_ids, cur_sentence_ids = state
for batch_predict_ids, batch_label_mask, batch_sentence_ids in zip(
outputs['predict_ids'], outputs['label_mask'],
outputs['sentence_ids']):
for tmp_predict_ids, tmp_label_mask, tmp_sentence_id in zip(
batch_predict_ids.numpy(), batch_label_mask.numpy(),
batch_sentence_ids.numpy()):
cur_sentence_ids.append(tmp_sentence_id)
cur_predict_ids.append([])
assert len(tmp_predict_ids) == len(tmp_label_mask)
for i in range(len(tmp_predict_ids)):
# Skip the padding label.
if tmp_label_mask[i]:
cur_predict_ids[-1].append(tmp_predict_ids[i])
return cur_predict_ids, cur_sentence_ids
loop_fn = orbit.utils.create_loop_fn(predict_step)
dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(),
task.build_inputs, params)
# Set `num_steps` to -1 to exhaust the dataset.
predict_ids, sentence_ids = loop_fn(
iter(dataset), num_steps=-1, state=([], []), reduce_fn=reduce_fn)
return predict_ids, sentence_ids
......@@ -16,22 +16,46 @@
"""Tests for official.nlp.tasks.tagging."""
import functools
import os
import numpy as np
import tensorflow as tf
from official.nlp.bert import configs
from official.nlp.bert import export_tfhub
from official.nlp.configs import bert
from official.nlp.configs import encoders
from official.nlp.data import tagging_data_loader
from official.nlp.tasks import tagging
def _create_fake_dataset(output_path, seq_length, num_labels, num_examples):
"""Creates a fake dataset."""
writer = tf.io.TFRecordWriter(output_path)
def create_int_feature(values):
f = tf.train.Feature(int64_list=tf.train.Int64List(value=list(values)))
return f
for i in range(num_examples):
features = {}
input_ids = np.random.randint(100, size=(seq_length))
features["input_ids"] = create_int_feature(input_ids)
features["input_mask"] = create_int_feature(np.ones_like(input_ids))
features["segment_ids"] = create_int_feature(np.ones_like(input_ids))
features["label_ids"] = create_int_feature(
np.random.random_integers(-1, num_labels - 1, size=(seq_length)))
features["sentence_id"] = create_int_feature([i])
tf_example = tf.train.Example(features=tf.train.Features(feature=features))
writer.write(tf_example.SerializeToString())
writer.close()
class TaggingTest(tf.test.TestCase):
def setUp(self):
super(TaggingTest, self).setUp()
self._encoder_config = encoders.TransformerEncoderConfig(
vocab_size=30522, num_layers=1)
self._train_data_config = bert.TaggingDataConfig(
self._train_data_config = tagging_data_loader.TaggingDataConfig(
input_path="dummy", seq_length=128, global_batch_size=1)
def _run_task(self, config):
......@@ -56,7 +80,7 @@ class TaggingTest(tf.test.TestCase):
config = tagging.TaggingConfig(
init_checkpoint=saved_path,
model=self._encoder_config,
model=tagging.ModelConfig(encoder=self._encoder_config),
train_data=self._train_data_config,
class_names=["O", "B-PER", "I-PER"])
task = tagging.TaggingTask(config)
......@@ -72,7 +96,7 @@ class TaggingTest(tf.test.TestCase):
def test_task_with_fit(self):
config = tagging.TaggingConfig(
model=self._encoder_config,
model=tagging.ModelConfig(encoder=self._encoder_config),
train_data=self._train_data_config,
class_names=["O", "B-PER", "I-PER"])
......@@ -115,14 +139,13 @@ class TaggingTest(tf.test.TestCase):
hub_module_url = self._export_bert_tfhub()
config = tagging.TaggingConfig(
hub_module_url=hub_module_url,
model=self._encoder_config,
class_names=["O", "B-PER", "I-PER"],
train_data=self._train_data_config)
self._run_task(config)
def test_seqeval_metrics(self):
config = tagging.TaggingConfig(
model=self._encoder_config,
model=tagging.ModelConfig(encoder=self._encoder_config),
train_data=self._train_data_config,
class_names=["O", "B-PER", "I-PER"])
task = tagging.TaggingTask(config)
......@@ -141,6 +164,34 @@ class TaggingTest(tf.test.TestCase):
self.assertCountEqual({"f1", "precision", "recall", "accuracy"},
task.reduce_aggregated_logs(aggregated).keys())
def test_predict(self):
task_config = tagging.TaggingConfig(
model=tagging.ModelConfig(encoder=self._encoder_config),
train_data=self._train_data_config,
class_names=["O", "B-PER", "I-PER"])
task = tagging.TaggingTask(task_config)
model = task.build_model()
test_data_path = os.path.join(self.get_temp_dir(), "test.tf_record")
seq_length = 16
num_examples = 100
_create_fake_dataset(
test_data_path,
seq_length=seq_length,
num_labels=len(task_config.class_names),
num_examples=num_examples)
test_data_config = tagging_data_loader.TaggingDataConfig(
input_path=test_data_path,
seq_length=seq_length,
is_training=False,
global_batch_size=16,
drop_remainder=False,
include_sentence_id=True)
predict_ids, sentence_ids = tagging.predict(task, test_data_config, model)
self.assertLen(predict_ids, num_examples)
self.assertLen(sentence_ids, num_examples)
if __name__ == "__main__":
tf.test.main()
......@@ -52,7 +52,6 @@ def create_model(params, is_train):
logits = tf.keras.layers.Lambda(lambda x: x, name="logits",
dtype=tf.float32)(logits)
model = tf.keras.Model([inputs, targets], logits)
# TODO(reedwm): Can we do this loss in float16 instead of float32?
loss = metrics.transformer_loss(
logits, targets, label_smoothing, vocab_size)
model.add_loss(loss)
......@@ -238,7 +237,6 @@ class Transformer(tf.keras.Model):
decoder_self_attention_bias = model_utils.get_decoder_self_attention_bias(
max_decode_length, dtype=self.params["dtype"])
# TODO(b/139770046): Refactor code with better naming of i.
def symbols_to_logits_fn(ids, i, cache):
"""Generate logits for next potential IDs.
......
......@@ -248,7 +248,6 @@ class TransformerTask(object):
callbacks = [cb for cb in callbacks
if isinstance(cb, keras_utils.TimeHistory)]
# TODO(b/139418525): Refactor the custom training loop logic.
@tf.function
def train_steps(iterator, steps):
"""Training steps function for TPU runs.
......@@ -422,8 +421,6 @@ class TransformerTask(object):
"""Loads model weights when it is provided."""
if init_weight_path:
logging.info("Load weights: {}".format(init_weight_path))
# TODO(b/139414977): Having the same variable restoring method for both
# TPU and GPU.
if self.use_tpu:
checkpoint = tf.train.Checkpoint(
model=model, optimizer=self._create_optimizer())
......
......@@ -67,7 +67,7 @@ def padded_cross_entropy_loss(logits, labels, smoothing, vocab_size):
# Calculate smoothing cross entropy
with tf.name_scope("smoothing_cross_entropy", values=[logits, labels]):
confidence = 1.0 - smoothing
low_confidence = (1.0 - confidence) / tf.to_float(vocab_size - 1)
low_confidence = (1.0 - confidence) / tf.cast(vocab_size - 1, tf.float32)
soft_targets = tf.one_hot(
tf.cast(labels, tf.int32),
depth=vocab_size,
......@@ -79,11 +79,11 @@ def padded_cross_entropy_loss(logits, labels, smoothing, vocab_size):
# Calculate the best (lowest) possible value of cross entropy, and
# subtract from the cross entropy loss.
normalizing_constant = -(
confidence * tf.log(confidence) + tf.to_float(vocab_size - 1) *
low_confidence * tf.log(low_confidence + 1e-20))
confidence * tf.log(confidence) + tf.cast(vocab_size - 1, tf.float32)
* low_confidence * tf.log(low_confidence + 1e-20))
xentropy -= normalizing_constant
weights = tf.to_float(tf.not_equal(labels, 0))
weights = tf.cast(tf.not_equal(labels, 0), tf.float32)
return xentropy * weights, weights
......@@ -142,24 +142,24 @@ def padded_accuracy(logits, labels):
"""Percentage of times that predictions matches labels on non-0s."""
with tf.variable_scope("padded_accuracy", values=[logits, labels]):
logits, labels = _pad_tensors_to_same_length(logits, labels)
weights = tf.to_float(tf.not_equal(labels, 0))
outputs = tf.to_int32(tf.argmax(logits, axis=-1))
padded_labels = tf.to_int32(labels)
return tf.to_float(tf.equal(outputs, padded_labels)), weights
weights = tf.cast(tf.not_equal(labels, 0), tf.float32)
outputs = tf.cast(tf.argmax(logits, axis=-1), tf.int32)
padded_labels = tf.cast(labels, tf.int32)
return tf.cast(tf.equal(outputs, padded_labels), tf.float32), weights
def padded_accuracy_topk(logits, labels, k):
"""Percentage of times that top-k predictions matches labels on non-0s."""
with tf.variable_scope("padded_accuracy_topk", values=[logits, labels]):
logits, labels = _pad_tensors_to_same_length(logits, labels)
weights = tf.to_float(tf.not_equal(labels, 0))
weights = tf.cast(tf.not_equal(labels, 0), tf.float32)
effective_k = tf.minimum(k, tf.shape(logits)[-1])
_, outputs = tf.nn.top_k(logits, k=effective_k)
outputs = tf.to_int32(outputs)
padded_labels = tf.to_int32(labels)
outputs = tf.cast(outputs, tf.int32)
padded_labels = tf.cast(labels, tf.int32)
padded_labels = tf.expand_dims(padded_labels, axis=-1)
padded_labels += tf.zeros_like(outputs) # Pad to same shape.
same = tf.to_float(tf.equal(outputs, padded_labels))
same = tf.cast(tf.equal(outputs, padded_labels), tf.float32)
same_topk = tf.reduce_sum(same, axis=-1)
return same_topk, weights
......@@ -172,10 +172,11 @@ def padded_sequence_accuracy(logits, labels):
"""Percentage of times that predictions matches labels everywhere (non-0)."""
with tf.variable_scope("padded_sequence_accuracy", values=[logits, labels]):
logits, labels = _pad_tensors_to_same_length(logits, labels)
weights = tf.to_float(tf.not_equal(labels, 0))
outputs = tf.to_int32(tf.argmax(logits, axis=-1))
padded_labels = tf.to_int32(labels)
not_correct = tf.to_float(tf.not_equal(outputs, padded_labels)) * weights
weights = tf.cast(tf.not_equal(labels, 0), tf.float32)
outputs = tf.cast(tf.argmax(logits, axis=-1), tf.int32)
padded_labels = tf.cast(labels, tf.int32)
not_correct = (tf.cast(tf.not_equal(outputs, padded_labels), tf.float32) *
weights)
axis = list(range(1, len(outputs.get_shape())))
correct_seq = 1.0 - tf.minimum(1.0, tf.reduce_sum(not_correct, axis=axis))
return correct_seq, tf.constant(1.0)
......@@ -201,7 +202,7 @@ def bleu_score(logits, labels):
Returns:
bleu: int, approx bleu score
"""
predictions = tf.to_int32(tf.argmax(logits, axis=-1))
predictions = tf.cast(tf.argmax(logits, axis=-1), tf.int32)
# TODO: Look into removing use of py_func
bleu = tf.py_func(compute_bleu, (labels, predictions), tf.float32)
return bleu, tf.constant(1.0)
......@@ -306,7 +307,7 @@ def rouge_2_fscore(logits, labels):
Returns:
rouge2_fscore: approx rouge-2 f1 score.
"""
predictions = tf.to_int32(tf.argmax(logits, axis=-1))
predictions = tf.cast(tf.argmax(logits, axis=-1), tf.int32)
# TODO: Look into removing use of py_func
rouge_2_f_score = tf.py_func(rouge_n, (predictions, labels), tf.float32)
return rouge_2_f_score, tf.constant(1.0)
......@@ -383,7 +384,7 @@ def rouge_l_fscore(predictions, labels):
Returns:
rouge_l_fscore: approx rouge-l f1 score.
"""
outputs = tf.to_int32(tf.argmax(predictions, axis=-1))
outputs = tf.cast(tf.argmax(predictions, axis=-1), tf.int32)
rouge_l_f_score = tf.py_func(rouge_l_sentence_level, (outputs, labels),
tf.float32)
return rouge_l_f_score, tf.constant(1.0)
......
......@@ -15,7 +15,6 @@ tensorflow-addons
dataclasses
gin-config
tf_slim>=1.1.0
typing
Cython
matplotlib
pyyaml
......
......@@ -57,7 +57,7 @@ class RetinanetModel(base_model.Model):
params.postprocess)
self._transpose_input = params.train.transpose_input
assert not self._transpose_input, 'Transpose input is not supportted.'
assert not self._transpose_input, 'Transpose input is not supported.'
# Input layer.
input_shape = (
params.retinanet_parser.output_size +
......
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
\ No newline at end of file
![TensorFlow Requirement: 2.x](https://img.shields.io/badge/TensorFlow%20Requirement-2.x-brightgreen)
# Orbit
Orbit is a customized training loop library built on top of Tensorflow 2. It
provides a flexible lightweight library that users can easily use or fork when
writing [customized training loop code](https://www.tensorflow.org/tutorials/distribute/custom_training)
in TF2. It intergates with `tf.distribute` seamlessly and supports running on
different device types (CPU, GPU, and TPU).
# Copyright 2020 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from orbit import utils
from orbit.controller import Controller
from orbit.runner import *
from orbit.standard_runner import *
This diff is collapsed.
This diff is collapsed.
# Copyright 2020 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""An abstraction that users can easily handle their custom training loops."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import abc
from typing import Dict, Optional, Text
import six
import tensorflow as tf
@six.add_metaclass(abc.ABCMeta)
class AbstractTrainer(tf.Module):
"""An abstract class defining the APIs required for training."""
@abc.abstractmethod
def train(self,
num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]:
"""Implements model training with multiple steps.
In training, it is common to break the total training steps into several
training loops, so users can do checkpointing, write summaries and run some
python callbacks. This is necessary for getting good performance in TPU
training, as the overhead for launching a multi worker tf.function may be
large in Eager mode. It is usually encouraged to create a host training loop
(e.g. using a `tf.range` wrapping `strategy.run` inside a
`tf.function`) in the TPU case. For the cases that don't require host
training loop to acheive peak performance, users can just implement a simple
python loop to drive each step.
Args:
num_steps: A guideline for how many training steps to run. Note that it is
up to the model what constitutes a "step" (this may involve more than
one update to model parameters, e.g. if training a GAN).
Returns:
The function may return a dictionary of `Tensors` or numpy arrays, which
will be written to logs and as TensorBoard summaries.
"""
pass
@six.add_metaclass(abc.ABCMeta)
class AbstractEvaluator(tf.Module):
"""An abstract class defining the APIs required for evaluation."""
@abc.abstractmethod
def evaluate(
self, num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]:
"""Implements model evaluation.
Args:
num_steps: A guideline for how many evaluation steps to run. Note that it
is up to the model what constitutes a "step". Generally, it may be
desirable to support both a limited number of eval steps and iterating
over a full dataset (however many steps are required) when `num_steps`
is `None`.
Returns:
The function may return a dictionary of `Tensors` or numpy arrays, which
will be written to logs and as TensorBoard summaries.
"""
pass
# Copyright 2020 The Orbit Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""An abstraction that users can easily handle their custom training loops."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import abc
from typing import Any, Dict, Optional, Text
from orbit import runner
from orbit import utils
import six
import tensorflow as tf
@six.add_metaclass(abc.ABCMeta)
class StandardTrainer(runner.AbstractTrainer):
"""Implements the standard functionality of AbstractTrainer APIs."""
def __init__(self,
train_dataset,
use_tf_while_loop=True,
use_tf_function=True,
use_tpu_summary_optimization=False):
"""Construct a `StandardTrainer` object.
Args:
train_dataset: A tf.nest-compatible structure of tf.data.Dataset or
DistributedDataset.
use_tf_while_loop: A boolean indicates whether to wrap the train step with
a `tf.while_loop`.
use_tf_function: A boolean indicates whether a `tf.function` will be used.
If False, training will run on pure eager mode.
use_tpu_summary_optimization: A boolean indicates whether to enable the
performance optimization for summaries in TPUs. In TPUs, writing
summaries with outside compilation inside train step is slow. If True,
it creates two `tf.function` with two XLA programs: one with summaries
and one without, and run the program with summaries (slow one) only if
necessary.
"""
if use_tf_while_loop and not use_tf_function:
raise ValueError("`use_tf_while_loop=True` and `use_tf_function=False` "
"is not supported")
if use_tpu_summary_optimization and not use_tf_while_loop:
raise ValueError("`use_tpu_summary_optimization=True` and "
"`use_tf_while_loop=False` is not supported")
self._use_tf_while_loop = use_tf_while_loop
self._use_tf_function = use_tf_function
self._train_dataset = train_dataset
self._train_iter = None
self._train_loop_fn = None
self._use_tpu_summary_optimization = use_tpu_summary_optimization
def train(self,
num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]:
"""See base class."""
self.train_loop_begin()
if self._train_iter is None:
self._train_iter = tf.nest.map_structure(iter, self.train_dataset)
if self._train_loop_fn is None:
train_fn = self.train_step
if self._use_tf_while_loop:
self._train_loop_fn = utils.create_tf_while_loop_fn(train_fn)
if self._use_tpu_summary_optimization:
self._train_loop_fn = utils.train_function_with_summaries(
self._train_loop_fn)
else:
self._train_loop_fn = tf.function(self._train_loop_fn)
else:
if self._use_tf_function:
train_fn = tf.function(train_fn)
self._train_loop_fn = utils.create_loop_fn(train_fn)
self._train_loop_fn(self._train_iter, num_steps)
return self.train_loop_end()
def train_loop_begin(self):
"""Called once at the beginning of the training loop.
This method is called before dataset iterators creation.
This is a good place to reset metrics that accumulate values over multiple
steps of training.
"""
pass
@abc.abstractmethod
def train_step(self, iterator):
"""Implements one step of training.
What a "step" consists of is up to the implementer. If using distribution
strategies, the call to this method should take place in the "cross-replica
context" for generality, to allow e.g. multiple iterator dequeues and calls
to `strategy.run`.
Args:
iterator: A tf.nest-compatible structure of tf.data Iterator or
DistributedIterator.
"""
pass
def train_loop_end(self) -> Optional[Dict[Text, tf.Tensor]]:
"""Called at the end of the training loop.
This is a good place to get metric results. The value returned from this
function will be returned as-is from the train() method.
Returns:
The function may return a dictionary of `Tensors`, which will be
written to logs and as TensorBoard summaries.
"""
pass
@property
def train_dataset(self):
"""Returns the train_dataset instance."""
return self._train_dataset
@train_dataset.setter
def train_dataset(self, train_dataset):
"""Set a new train dataset and replace with the existing one.
Any unfinished work in the previous dataset will be discarded.
Args:
train_dataset: A tf.nest-compatible structure of tf.data.Dataset or
DistributedDataset.
"""
self._train_dataset = train_dataset
self._train_iter = None
@six.add_metaclass(abc.ABCMeta)
class StandardEvaluator(runner.AbstractEvaluator):
"""Implements the standard functionality of AbstractEvaluator APIs."""
def __init__(self, eval_dataset, use_tf_function=True):
"""Construct a `StandardEvaluator` object.
Args:
eval_dataset: A tf.nest-compatible structure of tf.data.Dataset or
DistributedDataset.
use_tf_function: A boolean indicates whether a `tf.function` will be used.
If False, evaluation will run on pure eager mode.
"""
self._eval_use_tf_function = use_tf_function
self._eval_dataset = eval_dataset
self._eval_loop_fn = None
def evaluate(
self, num_steps: Optional[tf.Tensor]) -> Optional[Dict[Text, tf.Tensor]]:
"""See base class."""
outputs = self.eval_begin() # pylint: disable=assignment-from-no-return
eval_iter = tf.nest.map_structure(iter, self._eval_dataset)
if self._eval_loop_fn is None:
eval_fn = self.eval_step
if self._eval_use_tf_function:
eval_fn = tf.function(eval_fn)
self._eval_loop_fn = utils.create_loop_fn(eval_fn)
outputs = self._eval_loop_fn(
eval_iter, num_steps, state=outputs, reduce_fn=self.eval_reduce)
if outputs is None:
return self.eval_end()
else:
return self.eval_end(outputs)
def eval_begin(self) -> Any:
"""Called once at the beginning of the evaluation.
This method is called before dataset iterators creation.
This is a good place to reset metrics that accumulate values over the entire
evaluation.
Returns:
An output which is passed as `state` argument into `eval_reduce` function.
"""
pass
@abc.abstractmethod
def eval_step(self, iterator) -> Any:
"""Implements one step of evaluation.
What a "step" consists of is up to the implementer. If using distribution
strategies, the call to this method should take place in the "cross-replica
context" for generality, to allow e.g. multiple iterator dequeues and calls
to `strategy.run`.
Args:
iterator: A tf.nest-compatible structure of tf.data Iterator or
DistributedIterator.
Returns:
An output which is passed as `step_outputs` argument into `eval_reduce`
function.
"""
pass
def eval_end(self, *args) -> Optional[Dict[Text, tf.Tensor]]:
"""Called at the end of the evaluation.
This is a good place to get metric results. The value returned from this
function will be returned as-is from the evaluate() method.
Args:
*args: the outputs from `eval_reduce` for the last eval step.
Returns:
The function may return a dictionary of `Tensors`, which will be
written to logs and as TensorBoard summaries.
"""
pass
def eval_reduce(self, state=None, step_outputs=None) -> Any:
"""A function to do the reduction on the evaluation outputs per step.
This is useful for passing states throughout evaluation. E.g. it can be used
to maintain the output losses from all the evaluation steps, and compute the
mean loss in `eval_end` function.
Args:
state: A maintained state throughout the evaluation.
step_outputs: Outputs from the current evaluation step.
Returns:
An output which is passed as `state` argument into `eval_reduce` function
for the next step. After evaluation is finished, the output from last step
will be passed into `eval_end` function.
"""
pass
@property
def eval_dataset(self):
"""Returns the train_datase instance."""
return self._eval_dataset
@eval_dataset.setter
def eval_dataset(self, eval_dataset):
"""Set a new eval dataset and replace with the existing one.
Args:
eval_dataset: A tf.nest-compatible structure of tf.data.Dataset or
DistributedDataset.
"""
self._eval_dataset = eval_dataset
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment