"magic_pdf/vscode:/vscode.git/clone" did not exist on "5b86ca82feb58b327affc04bb550aaf9f5a33bb0"
Commit 356c98bd authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge remote-tracking branch 'upstream/master' into detr-push-3

parents d31aba8a b9785623
...@@ -17,12 +17,15 @@ ...@@ -17,12 +17,15 @@
import collections import collections
import json import json
import os import os
from absl import logging from absl import logging
import dataclasses import dataclasses
import orbit
import tensorflow as tf import tensorflow as tf
import tensorflow_hub as hub import tensorflow_hub as hub
from official.core import base_task from official.core import base_task
from official.core import task_factory
from official.modeling.hyperparams import base_config from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import config_definitions as cfg from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.bert import squad_evaluate_v1_1 from official.nlp.bert import squad_evaluate_v1_1
...@@ -39,8 +42,7 @@ from official.nlp.tasks import utils ...@@ -39,8 +42,7 @@ from official.nlp.tasks import utils
@dataclasses.dataclass @dataclasses.dataclass
class ModelConfig(base_config.Config): class ModelConfig(base_config.Config):
"""A base span labeler configuration.""" """A base span labeler configuration."""
encoder: encoders.TransformerEncoderConfig = ( encoder: encoders.EncoderConfig = encoders.EncoderConfig()
encoders.TransformerEncoderConfig())
@dataclasses.dataclass @dataclasses.dataclass
...@@ -57,7 +59,7 @@ class QuestionAnsweringConfig(cfg.TaskConfig): ...@@ -57,7 +59,7 @@ class QuestionAnsweringConfig(cfg.TaskConfig):
validation_data: cfg.DataConfig = cfg.DataConfig() validation_data: cfg.DataConfig = cfg.DataConfig()
@base_task.register_task_cls(QuestionAnsweringConfig) @task_factory.register_task_cls(QuestionAnsweringConfig)
class QuestionAnsweringTask(base_task.Task): class QuestionAnsweringTask(base_task.Task):
"""Task object for question answering.""" """Task object for question answering."""
...@@ -83,17 +85,21 @@ class QuestionAnsweringTask(base_task.Task): ...@@ -83,17 +85,21 @@ class QuestionAnsweringTask(base_task.Task):
self._tf_record_input_path, self._eval_examples, self._eval_features = ( self._tf_record_input_path, self._eval_examples, self._eval_features = (
self._preprocess_eval_data(params.validation_data)) self._preprocess_eval_data(params.validation_data))
def set_preprocessed_eval_input_path(self, eval_input_path):
"""Sets the path to the preprocessed eval data."""
self._tf_record_input_path = eval_input_path
def build_model(self): def build_model(self):
if self._hub_module: if self._hub_module:
encoder_network = utils.get_encoder_from_hub(self._hub_module) encoder_network = utils.get_encoder_from_hub(self._hub_module)
else: else:
encoder_network = encoders.instantiate_encoder_from_cfg( encoder_network = encoders.build_encoder(self.task_config.model.encoder)
self.task_config.model.encoder) encoder_cfg = self.task_config.model.encoder.get()
# Currently, we only supports bert-style question answering finetuning. # Currently, we only supports bert-style question answering finetuning.
return models.BertSpanLabeler( return models.BertSpanLabeler(
network=encoder_network, network=encoder_network,
initializer=tf.keras.initializers.TruncatedNormal( initializer=tf.keras.initializers.TruncatedNormal(
stddev=self.task_config.model.encoder.initializer_range)) stddev=encoder_cfg.initializer_range))
def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor:
start_positions = labels['start_positions'] start_positions = labels['start_positions']
...@@ -241,10 +247,6 @@ class QuestionAnsweringTask(base_task.Task): ...@@ -241,10 +247,6 @@ class QuestionAnsweringTask(base_task.Task):
step_outputs['end_logits']): step_outputs['end_logits']):
u_ids, s_logits, e_logits = ( u_ids, s_logits, e_logits = (
unique_ids.numpy(), start_logits.numpy(), end_logits.numpy()) unique_ids.numpy(), start_logits.numpy(), end_logits.numpy())
if u_ids.size == 1:
u_ids = [u_ids]
s_logits = [s_logits]
e_logits = [e_logits]
for values in zip(u_ids, s_logits, e_logits): for values in zip(u_ids, s_logits, e_logits):
state.append(self.raw_aggregated_result( state.append(self.raw_aggregated_result(
unique_id=values[0], unique_id=values[0],
...@@ -291,16 +293,45 @@ class QuestionAnsweringTask(base_task.Task): ...@@ -291,16 +293,45 @@ class QuestionAnsweringTask(base_task.Task):
'final_f1': eval_metrics['final_f1']} 'final_f1': eval_metrics['final_f1']}
return eval_metrics return eval_metrics
def initialize(self, model):
"""Load a pretrained checkpoint (if exists) and then train from iter 0.""" def predict(task: QuestionAnsweringTask, params: cfg.DataConfig,
ckpt_dir_or_file = self.task_config.init_checkpoint model: tf.keras.Model):
if tf.io.gfile.isdir(ckpt_dir_or_file): """Predicts on the input data.
ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
if not ckpt_dir_or_file: Args:
return task: A `QuestionAnsweringTask` object.
params: A `cfg.DataConfig` object.
ckpt = tf.train.Checkpoint(**model.checkpoint_items) model: A keras.Model.
status = ckpt.read(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched() Returns:
logging.info('Finished loading pretrained checkpoint from %s', A tuple of `all_predictions`, `all_nbest` and `scores_diff`, which
ckpt_dir_or_file) are dict and can be written to json files including prediction json file,
nbest json file and null_odds json file.
"""
tf_record_input_path, eval_examples, eval_features = (
task._preprocess_eval_data(params)) # pylint: disable=protected-access
# `tf_record_input_path` will overwrite `params.input_path`,
# when `task.buid_inputs()` is called.
task.set_preprocessed_eval_input_path(tf_record_input_path)
def predict_step(inputs):
"""Replicated prediction calculation."""
return task.validation_step(inputs, model)
dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(),
task.build_inputs, params)
aggregated_outputs = utils.predict(predict_step, task.aggregate_logs, dataset)
all_predictions, all_nbest, scores_diff = (
task.squad_lib.postprocess_output(
eval_examples,
eval_features,
aggregated_outputs,
task.task_config.n_best_size,
task.task_config.max_answer_length,
task.task_config.validation_data.do_lower_case,
version_2_with_negative=(params.version_2_with_negative),
null_score_diff_threshold=task.task_config.null_score_diff_threshold,
verbose=False))
return all_predictions, all_nbest, scores_diff
...@@ -25,6 +25,7 @@ from official.nlp.bert import export_tfhub ...@@ -25,6 +25,7 @@ from official.nlp.bert import export_tfhub
from official.nlp.configs import bert from official.nlp.configs import bert
from official.nlp.configs import encoders from official.nlp.configs import encoders
from official.nlp.data import question_answering_dataloader from official.nlp.data import question_answering_dataloader
from official.nlp.tasks import masked_lm
from official.nlp.tasks import question_answering from official.nlp.tasks import question_answering
...@@ -32,21 +33,37 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase): ...@@ -32,21 +33,37 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
def setUp(self): def setUp(self):
super(QuestionAnsweringTaskTest, self).setUp() super(QuestionAnsweringTaskTest, self).setUp()
self._encoder_config = encoders.TransformerEncoderConfig( self._encoder_config = encoders.EncoderConfig(
vocab_size=30522, num_layers=1) bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1))
self._train_data_config = question_answering_dataloader.QADataConfig( self._train_data_config = question_answering_dataloader.QADataConfig(
input_path="dummy", input_path="dummy", seq_length=128, global_batch_size=1)
seq_length=128,
global_batch_size=1) val_data = {
"version":
val_data = {"version": "1.1", "1.1",
"data": [{"paragraphs": [ "data": [{
{"context": "Sky is blue.", "paragraphs": [{
"qas": [{"question": "What is blue?", "id": "1234", "context":
"answers": [{"text": "Sky", "answer_start": 0}, "Sky is blue.",
{"text": "Sky", "answer_start": 0}, "qas": [{
{"text": "Sky", "answer_start": 0}] "question":
}]}]}]} "What is blue?",
"id":
"1234",
"answers": [{
"text": "Sky",
"answer_start": 0
}, {
"text": "Sky",
"answer_start": 0
}, {
"text": "Sky",
"answer_start": 0
}]
}]
}]
}]
}
self._val_input_path = os.path.join(self.get_temp_dir(), "val_data.json") self._val_input_path = os.path.join(self.get_temp_dir(), "val_data.json")
with tf.io.gfile.GFile(self._val_input_path, "w") as writer: with tf.io.gfile.GFile(self._val_input_path, "w") as writer:
writer.write(json.dumps(val_data, indent=4) + "\n") writer.write(json.dumps(val_data, indent=4) + "\n")
...@@ -81,23 +98,26 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase): ...@@ -81,23 +98,26 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
val_dataset = task.build_inputs(config.validation_data) val_dataset = task.build_inputs(config.validation_data)
val_iterator = iter(val_dataset) val_iterator = iter(val_dataset)
logs = task.validation_step(next(val_iterator), model, metrics=metrics) logs = task.validation_step(next(val_iterator), model, metrics=metrics)
# Mock that `logs` is from one replica.
logs = {x: (logs[x],) for x in logs}
logs = task.aggregate_logs(step_outputs=logs) logs = task.aggregate_logs(step_outputs=logs)
metrics = task.reduce_aggregated_logs(logs) metrics = task.reduce_aggregated_logs(logs)
self.assertIn("final_f1", metrics) self.assertIn("final_f1", metrics)
@parameterized.parameters(itertools.product( @parameterized.parameters(
itertools.product(
(False, True), (False, True),
("WordPiece", "SentencePiece"), ("WordPiece", "SentencePiece"),
)) ))
def test_task(self, version_2_with_negative, tokenization): def test_task(self, version_2_with_negative, tokenization):
# Saves a checkpoint. # Saves a checkpoint.
pretrain_cfg = bert.BertPretrainerConfig( pretrain_cfg = bert.PretrainerConfig(
encoder=self._encoder_config, encoder=self._encoder_config,
cls_heads=[ cls_heads=[
bert.ClsHeadConfig( bert.ClsHeadConfig(
inner_dim=10, num_classes=3, name="next_sentence") inner_dim=10, num_classes=3, name="next_sentence")
]) ])
pretrain_model = bert.instantiate_pretrainer_from_cfg(pretrain_cfg) pretrain_model = masked_lm.MaskedLMTask(None).build_model(pretrain_cfg)
ckpt = tf.train.Checkpoint( ckpt = tf.train.Checkpoint(
model=pretrain_model, **pretrain_model.checkpoint_items) model=pretrain_model, **pretrain_model.checkpoint_items)
saved_path = ckpt.save(self.get_temp_dir()) saved_path = ckpt.save(self.get_temp_dir())
...@@ -160,6 +180,27 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase): ...@@ -160,6 +180,27 @@ class QuestionAnsweringTaskTest(tf.test.TestCase, parameterized.TestCase):
validation_data=self._get_validation_data_config()) validation_data=self._get_validation_data_config())
self._run_task(config) self._run_task(config)
@parameterized.named_parameters(("squad1", False), ("squad2", True))
def test_predict(self, version_2_with_negative):
validation_data = self._get_validation_data_config(
version_2_with_negative=version_2_with_negative)
config = question_answering.QuestionAnsweringConfig(
model=question_answering.ModelConfig(encoder=self._encoder_config),
train_data=self._train_data_config,
validation_data=validation_data)
task = question_answering.QuestionAnsweringTask(config)
model = task.build_model()
all_predictions, all_nbest, scores_diff = question_answering.predict(
task, validation_data, model)
self.assertLen(all_predictions, 1)
self.assertLen(all_nbest, 1)
if version_2_with_negative:
self.assertLen(scores_diff, 1)
else:
self.assertEmpty(scores_diff)
if __name__ == "__main__": if __name__ == "__main__":
tf.test.main() tf.test.main()
...@@ -26,6 +26,7 @@ import tensorflow as tf ...@@ -26,6 +26,7 @@ import tensorflow as tf
import tensorflow_hub as hub import tensorflow_hub as hub
from official.core import base_task from official.core import base_task
from official.core import task_factory
from official.modeling.hyperparams import base_config from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import config_definitions as cfg from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.configs import encoders from official.nlp.configs import encoders
...@@ -43,8 +44,7 @@ class ModelConfig(base_config.Config): ...@@ -43,8 +44,7 @@ class ModelConfig(base_config.Config):
"""A classifier/regressor configuration.""" """A classifier/regressor configuration."""
num_classes: int = 0 num_classes: int = 0
use_encoder_pooler: bool = False use_encoder_pooler: bool = False
encoder: encoders.TransformerEncoderConfig = ( encoder: encoders.EncoderConfig = encoders.EncoderConfig()
encoders.TransformerEncoderConfig())
@dataclasses.dataclass @dataclasses.dataclass
...@@ -62,7 +62,7 @@ class SentencePredictionConfig(cfg.TaskConfig): ...@@ -62,7 +62,7 @@ class SentencePredictionConfig(cfg.TaskConfig):
validation_data: cfg.DataConfig = cfg.DataConfig() validation_data: cfg.DataConfig = cfg.DataConfig()
@base_task.register_task_cls(SentencePredictionConfig) @task_factory.register_task_cls(SentencePredictionConfig)
class SentencePredictionTask(base_task.Task): class SentencePredictionTask(base_task.Task):
"""Task object for sentence_prediction.""" """Task object for sentence_prediction."""
...@@ -84,15 +84,14 @@ class SentencePredictionTask(base_task.Task): ...@@ -84,15 +84,14 @@ class SentencePredictionTask(base_task.Task):
if self._hub_module: if self._hub_module:
encoder_network = utils.get_encoder_from_hub(self._hub_module) encoder_network = utils.get_encoder_from_hub(self._hub_module)
else: else:
encoder_network = encoders.instantiate_encoder_from_cfg( encoder_network = encoders.build_encoder(self.task_config.model.encoder)
self.task_config.model.encoder) encoder_cfg = self.task_config.model.encoder.get()
# Currently, we only support bert-style sentence prediction finetuning. # Currently, we only support bert-style sentence prediction finetuning.
return models.BertClassifier( return models.BertClassifier(
network=encoder_network, network=encoder_network,
num_classes=self.task_config.model.num_classes, num_classes=self.task_config.model.num_classes,
initializer=tf.keras.initializers.TruncatedNormal( initializer=tf.keras.initializers.TruncatedNormal(
stddev=self.task_config.model.encoder.initializer_range), stddev=encoder_cfg.initializer_range),
use_encoder_pooler=self.task_config.model.use_encoder_pooler) use_encoder_pooler=self.task_config.model.use_encoder_pooler)
def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor: def build_losses(self, labels, model_outputs, aux_losses=None) -> tf.Tensor:
...@@ -244,11 +243,7 @@ def predict(task: SentencePredictionTask, params: cfg.DataConfig, ...@@ -244,11 +243,7 @@ def predict(task: SentencePredictionTask, params: cfg.DataConfig,
""" """
is_regression = task.task_config.model.num_classes == 1 is_regression = task.task_config.model.num_classes == 1
@tf.function def predict_step(inputs):
def predict_step(iterator):
"""Predicts on distributed devices."""
def _replicated_step(inputs):
"""Replicated prediction calculation.""" """Replicated prediction calculation."""
x, _ = inputs x, _ = inputs
outputs = task.inference_step(x, model) outputs = task.inference_step(x, model)
...@@ -257,21 +252,16 @@ def predict(task: SentencePredictionTask, params: cfg.DataConfig, ...@@ -257,21 +252,16 @@ def predict(task: SentencePredictionTask, params: cfg.DataConfig,
else: else:
return tf.argmax(outputs, axis=-1) return tf.argmax(outputs, axis=-1)
outputs = tf.distribute.get_strategy().run( def aggregate_fn(state, outputs):
_replicated_step, args=(next(iterator),))
return tf.nest.map_structure(
tf.distribute.get_strategy().experimental_local_results, outputs)
def reduce_fn(state, outputs):
"""Concatenates model's outputs.""" """Concatenates model's outputs."""
if state is None:
state = {'predictions': []}
for per_replica_batch_predictions in outputs: for per_replica_batch_predictions in outputs:
state.extend(per_replica_batch_predictions) state['predictions'].extend(per_replica_batch_predictions)
return state return state
loop_fn = orbit.utils.create_loop_fn(predict_step)
dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(), dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(),
task.build_inputs, params) task.build_inputs, params)
# Set `num_steps` to -1 to exhaust the dataset. outputs = utils.predict(predict_step, aggregate_fn, dataset)
predictions = loop_fn( return outputs['predictions']
iter(dataset), num_steps=-1, state=[], reduce_fn=reduce_fn)
return predictions
...@@ -26,6 +26,7 @@ from official.nlp.bert import export_tfhub ...@@ -26,6 +26,7 @@ from official.nlp.bert import export_tfhub
from official.nlp.configs import bert from official.nlp.configs import bert
from official.nlp.configs import encoders from official.nlp.configs import encoders
from official.nlp.data import sentence_prediction_dataloader from official.nlp.data import sentence_prediction_dataloader
from official.nlp.tasks import masked_lm
from official.nlp.tasks import sentence_prediction from official.nlp.tasks import sentence_prediction
...@@ -68,8 +69,8 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase): ...@@ -68,8 +69,8 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
def get_model_config(self, num_classes): def get_model_config(self, num_classes):
return sentence_prediction.ModelConfig( return sentence_prediction.ModelConfig(
encoder=encoders.TransformerEncoderConfig( encoder=encoders.EncoderConfig(
vocab_size=30522, num_layers=1), bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)),
num_classes=num_classes) num_classes=num_classes)
def _run_task(self, config): def _run_task(self, config):
...@@ -102,14 +103,14 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase): ...@@ -102,14 +103,14 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
task.validation_step(next(iterator), model, metrics=metrics) task.validation_step(next(iterator), model, metrics=metrics)
# Saves a checkpoint. # Saves a checkpoint.
pretrain_cfg = bert.BertPretrainerConfig( pretrain_cfg = bert.PretrainerConfig(
encoder=encoders.TransformerEncoderConfig( encoder=encoders.EncoderConfig(
vocab_size=30522, num_layers=1), bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1)),
cls_heads=[ cls_heads=[
bert.ClsHeadConfig( bert.ClsHeadConfig(
inner_dim=10, num_classes=3, name="next_sentence") inner_dim=10, num_classes=3, name="next_sentence")
]) ])
pretrain_model = bert.instantiate_pretrainer_from_cfg(pretrain_cfg) pretrain_model = masked_lm.MaskedLMTask(None).build_model(pretrain_cfg)
ckpt = tf.train.Checkpoint( ckpt = tf.train.Checkpoint(
model=pretrain_model, **pretrain_model.checkpoint_items) model=pretrain_model, **pretrain_model.checkpoint_items)
ckpt.save(config.init_checkpoint) ckpt.save(config.init_checkpoint)
...@@ -136,8 +137,8 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase): ...@@ -136,8 +137,8 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
if num_classes == 1: if num_classes == 1:
self.assertIsInstance(metrics[0], tf.keras.metrics.MeanSquaredError) self.assertIsInstance(metrics[0], tf.keras.metrics.MeanSquaredError)
else: else:
self.assertIsInstance( self.assertIsInstance(metrics[0],
metrics[0], tf.keras.metrics.SparseCategoricalAccuracy) tf.keras.metrics.SparseCategoricalAccuracy)
dataset = task.build_inputs(config.train_data) dataset = task.build_inputs(config.train_data)
iterator = iter(dataset) iterator = iter(dataset)
...@@ -147,9 +148,9 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase): ...@@ -147,9 +148,9 @@ class SentencePredictionTaskTest(tf.test.TestCase, parameterized.TestCase):
logs = task.validation_step(next(iterator), model, metrics=metrics) logs = task.validation_step(next(iterator), model, metrics=metrics)
loss = logs["loss"].numpy() loss = logs["loss"].numpy()
if num_classes == 1: if num_classes == 1:
self.assertAlmostEqual(loss, 42.77483, places=3) self.assertGreater(loss, 1.0)
else: else:
self.assertAlmostEqual(loss, 3.57627e-6, places=3) self.assertLess(loss, 1.0)
@parameterized.parameters(("matthews_corrcoef", 2), @parameterized.parameters(("matthews_corrcoef", 2),
("pearson_spearman_corr", 1)) ("pearson_spearman_corr", 1))
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Tagging (e.g., NER/POS) task.""" """Tagging (e.g., NER/POS) task."""
import logging
from typing import List, Optional, Tuple from typing import List, Optional, Tuple
import dataclasses import dataclasses
...@@ -26,6 +25,7 @@ import tensorflow as tf ...@@ -26,6 +25,7 @@ import tensorflow as tf
import tensorflow_hub as hub import tensorflow_hub as hub
from official.core import base_task from official.core import base_task
from official.core import task_factory
from official.modeling.hyperparams import base_config from official.modeling.hyperparams import base_config
from official.modeling.hyperparams import config_definitions as cfg from official.modeling.hyperparams import config_definitions as cfg
from official.nlp.configs import encoders from official.nlp.configs import encoders
...@@ -37,8 +37,7 @@ from official.nlp.tasks import utils ...@@ -37,8 +37,7 @@ from official.nlp.tasks import utils
@dataclasses.dataclass @dataclasses.dataclass
class ModelConfig(base_config.Config): class ModelConfig(base_config.Config):
"""A base span labeler configuration.""" """A base span labeler configuration."""
encoder: encoders.TransformerEncoderConfig = ( encoder: encoders.EncoderConfig = encoders.EncoderConfig()
encoders.TransformerEncoderConfig())
head_dropout: float = 0.1 head_dropout: float = 0.1
head_initializer_range: float = 0.02 head_initializer_range: float = 0.02
...@@ -81,7 +80,7 @@ def _masked_labels_and_weights(y_true): ...@@ -81,7 +80,7 @@ def _masked_labels_and_weights(y_true):
return masked_y_true, tf.cast(mask, tf.float32) return masked_y_true, tf.cast(mask, tf.float32)
@base_task.register_task_cls(TaggingConfig) @task_factory.register_task_cls(TaggingConfig)
class TaggingTask(base_task.Task): class TaggingTask(base_task.Task):
"""Task object for tagging (e.g., NER or POS).""" """Task object for tagging (e.g., NER or POS)."""
...@@ -102,8 +101,7 @@ class TaggingTask(base_task.Task): ...@@ -102,8 +101,7 @@ class TaggingTask(base_task.Task):
if self._hub_module: if self._hub_module:
encoder_network = utils.get_encoder_from_hub(self._hub_module) encoder_network = utils.get_encoder_from_hub(self._hub_module)
else: else:
encoder_network = encoders.instantiate_encoder_from_cfg( encoder_network = encoders.build_encoder(self.task_config.model.encoder)
self.task_config.model.encoder)
return models.BertTokenClassifier( return models.BertTokenClassifier(
network=encoder_network, network=encoder_network,
...@@ -215,20 +213,6 @@ class TaggingTask(base_task.Task): ...@@ -215,20 +213,6 @@ class TaggingTask(base_task.Task):
seqeval_metrics.accuracy_score(label_class, predict_class), seqeval_metrics.accuracy_score(label_class, predict_class),
} }
def initialize(self, model):
"""Load a pretrained checkpoint (if exists) and then train from iter 0."""
ckpt_dir_or_file = self.task_config.init_checkpoint
if tf.io.gfile.isdir(ckpt_dir_or_file):
ckpt_dir_or_file = tf.train.latest_checkpoint(ckpt_dir_or_file)
if not ckpt_dir_or_file:
return
ckpt = tf.train.Checkpoint(**model.checkpoint_items)
status = ckpt.restore(ckpt_dir_or_file)
status.expect_partial().assert_existing_objects_matched()
logging.info('Finished loading pretrained checkpoint from %s',
ckpt_dir_or_file)
def predict(task: TaggingTask, params: cfg.DataConfig, def predict(task: TaggingTask, params: cfg.DataConfig,
model: tf.keras.Model) -> Tuple[List[List[int]], List[int]]: model: tf.keras.Model) -> Tuple[List[List[int]], List[int]]:
...@@ -246,11 +230,7 @@ def predict(task: TaggingTask, params: cfg.DataConfig, ...@@ -246,11 +230,7 @@ def predict(task: TaggingTask, params: cfg.DataConfig,
sentence id of the corresponding example. sentence id of the corresponding example.
""" """
@tf.function def predict_step(inputs):
def predict_step(iterator):
"""Predicts on distributed devices."""
def _replicated_step(inputs):
"""Replicated prediction calculation.""" """Replicated prediction calculation."""
x, y = inputs x, y = inputs
sentence_ids = x.pop('sentence_id') sentence_ids = x.pop('sentence_id')
...@@ -262,14 +242,13 @@ def predict(task: TaggingTask, params: cfg.DataConfig, ...@@ -262,14 +242,13 @@ def predict(task: TaggingTask, params: cfg.DataConfig,
label_mask=label_mask, label_mask=label_mask,
sentence_ids=sentence_ids) sentence_ids=sentence_ids)
outputs = tf.distribute.get_strategy().run( def aggregate_fn(state, outputs):
_replicated_step, args=(next(iterator),))
return tf.nest.map_structure(
tf.distribute.get_strategy().experimental_local_results, outputs)
def reduce_fn(state, outputs):
"""Concatenates model's outputs.""" """Concatenates model's outputs."""
cur_predict_ids, cur_sentence_ids = state if state is None:
state = {'predict_ids': [], 'sentence_ids': []}
cur_predict_ids = state['predict_ids']
cur_sentence_ids = state['sentence_ids']
for batch_predict_ids, batch_label_mask, batch_sentence_ids in zip( for batch_predict_ids, batch_label_mask, batch_sentence_ids in zip(
outputs['predict_ids'], outputs['label_mask'], outputs['predict_ids'], outputs['label_mask'],
outputs['sentence_ids']): outputs['sentence_ids']):
...@@ -283,12 +262,9 @@ def predict(task: TaggingTask, params: cfg.DataConfig, ...@@ -283,12 +262,9 @@ def predict(task: TaggingTask, params: cfg.DataConfig,
# Skip the padding label. # Skip the padding label.
if tmp_label_mask[i]: if tmp_label_mask[i]:
cur_predict_ids[-1].append(tmp_predict_ids[i]) cur_predict_ids[-1].append(tmp_predict_ids[i])
return cur_predict_ids, cur_sentence_ids return state
loop_fn = orbit.utils.create_loop_fn(predict_step)
dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(), dataset = orbit.utils.make_distributed_dataset(tf.distribute.get_strategy(),
task.build_inputs, params) task.build_inputs, params)
# Set `num_steps` to -1 to exhaust the dataset. outputs = utils.predict(predict_step, aggregate_fn, dataset)
predict_ids, sentence_ids = loop_fn( return outputs['predict_ids'], outputs['sentence_ids']
iter(dataset), num_steps=-1, state=([], []), reduce_fn=reduce_fn)
return predict_ids, sentence_ids
...@@ -53,8 +53,8 @@ class TaggingTest(tf.test.TestCase): ...@@ -53,8 +53,8 @@ class TaggingTest(tf.test.TestCase):
def setUp(self): def setUp(self):
super(TaggingTest, self).setUp() super(TaggingTest, self).setUp()
self._encoder_config = encoders.TransformerEncoderConfig( self._encoder_config = encoders.EncoderConfig(
vocab_size=30522, num_layers=1) bert=encoders.BertEncoderConfig(vocab_size=30522, num_layers=1))
self._train_data_config = tagging_data_loader.TaggingDataConfig( self._train_data_config = tagging_data_loader.TaggingDataConfig(
input_path="dummy", seq_length=128, global_batch_size=1) input_path="dummy", seq_length=128, global_batch_size=1)
...@@ -74,7 +74,7 @@ class TaggingTest(tf.test.TestCase): ...@@ -74,7 +74,7 @@ class TaggingTest(tf.test.TestCase):
def test_task(self): def test_task(self):
# Saves a checkpoint. # Saves a checkpoint.
encoder = encoders.instantiate_encoder_from_cfg(self._encoder_config) encoder = encoders.build_encoder(self._encoder_config)
ckpt = tf.train.Checkpoint(encoder=encoder) ckpt = tf.train.Checkpoint(encoder=encoder)
saved_path = ckpt.save(self.get_temp_dir()) saved_path = ckpt.save(self.get_temp_dir())
......
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Common utils for tasks.""" """Common utils for tasks."""
from typing import Any, Callable
import orbit
import tensorflow as tf import tensorflow as tf
import tensorflow_hub as hub import tensorflow_hub as hub
...@@ -32,3 +35,34 @@ def get_encoder_from_hub(hub_module: str) -> tf.keras.Model: ...@@ -32,3 +35,34 @@ def get_encoder_from_hub(hub_module: str) -> tf.keras.Model:
return tf.keras.Model( return tf.keras.Model(
inputs=[input_word_ids, input_mask, input_type_ids], inputs=[input_word_ids, input_mask, input_type_ids],
outputs=[sequence_output, pooled_output]) outputs=[sequence_output, pooled_output])
def predict(predict_step_fn: Callable[[Any], Any],
aggregate_fn: Callable[[Any, Any], Any],
dataset: tf.data.Dataset):
"""Runs prediction.
Args:
predict_step_fn: A callable such as `def predict_step(inputs)`, where
`inputs` are input tensors.
aggregate_fn: A callable such as `def aggregate_fn(state, value)`, where
`value` is the outputs from `predict_step_fn`.
dataset: A `tf.data.Dataset` object.
Returns:
The aggregated predictions.
"""
@tf.function
def predict_step(iterator):
"""Predicts on distributed devices."""
outputs = tf.distribute.get_strategy().run(
predict_step_fn, args=(next(iterator),))
return tf.nest.map_structure(
tf.distribute.get_strategy().experimental_local_results, outputs)
loop_fn = orbit.utils.create_loop_fn(predict_step)
# Set `num_steps` to -1 to exhaust the dataset.
outputs = loop_fn(
iter(dataset), num_steps=-1, state=None, reduce_fn=aggregate_fn) # pytype: disable=wrong-arg-types
return outputs
...@@ -26,7 +26,7 @@ import re ...@@ -26,7 +26,7 @@ import re
import sys import sys
import unicodedata import unicodedata
from absl import app as absl_app from absl import app
from absl import flags from absl import flags
import six import six
from six.moves import range from six.moves import range
...@@ -149,4 +149,4 @@ if __name__ == "__main__": ...@@ -149,4 +149,4 @@ if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
define_compute_bleu_flags() define_compute_bleu_flags()
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
absl_app.run(main) app.run(main)
...@@ -23,7 +23,7 @@ import random ...@@ -23,7 +23,7 @@ import random
import tarfile import tarfile
# pylint: disable=g-bad-import-order # pylint: disable=g-bad-import-order
from absl import app as absl_app from absl import app
from absl import flags from absl import flags
from absl import logging from absl import logging
import six import six
...@@ -436,4 +436,4 @@ if __name__ == "__main__": ...@@ -436,4 +436,4 @@ if __name__ == "__main__":
logging.set_verbosity(logging.INFO) logging.set_verbosity(logging.INFO)
define_data_download_flags() define_data_download_flags()
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
absl_app.run(main) app.run(main)
...@@ -14,32 +14,14 @@ ...@@ -14,32 +14,14 @@
# ============================================================================== # ==============================================================================
"""Keras layers of XLNet model in TF 2.0.""" """Keras layers of XLNet model in TF 2.0."""
from __future__ import absolute_import
from __future__ import division
# from __future__ import google_type_annotations
from __future__ import print_function
import copy import copy
import numpy as np
import tensorflow as tf import tensorflow as tf
from official.nlp.xlnet import data_utils from official.nlp.xlnet import data_utils
def gelu(x): def gelu(x):
"""Gaussian Error Linear Unit. return tf.keras.activations.gelu(x, approximate=True)
This is a smoother version of the RELU.
Original paper: https://arxiv.org/abs/1606.08415
Args:
x: float Tensor to perform activation.
Returns:
`x` with the GELU activation applied.
"""
cdf = 0.5 * (1.0 + tf.tanh(
(np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
return x * cdf
def rel_shift(x, klen=-1): def rel_shift(x, klen=-1):
......
...@@ -20,7 +20,7 @@ import sys ...@@ -20,7 +20,7 @@ import sys
from setuptools import find_packages from setuptools import find_packages
from setuptools import setup from setuptools import setup
version = '2.2.0' version = '2.3.0'
project_name = 'tf-models-official' project_name = 'tf-models-official'
...@@ -60,7 +60,7 @@ if project_name == 'tf-models-nightly': ...@@ -60,7 +60,7 @@ if project_name == 'tf-models-nightly':
version += '.dev' + datetime.datetime.now().strftime('%Y%m%d') version += '.dev' + datetime.datetime.now().strftime('%Y%m%d')
install_requires.append('tf-nightly') install_requires.append('tf-nightly')
else: else:
install_requires.append('tensorflow>=2.2.0') install_requires.append('tensorflow>=2.3.0')
print('install_requires: ', install_requires) print('install_requires: ', install_requires)
print('dependency_links: ', dependency_links) print('dependency_links: ', dependency_links)
......
...@@ -32,7 +32,8 @@ from official.recommendation import movielens ...@@ -32,7 +32,8 @@ from official.recommendation import movielens
def create_dataset_from_tf_record_files(input_file_pattern, def create_dataset_from_tf_record_files(input_file_pattern,
pre_batch_size, pre_batch_size,
batch_size, batch_size,
is_training=True): is_training=True,
rebatch=False):
"""Creates dataset from (tf)records files for training/evaluation.""" """Creates dataset from (tf)records files for training/evaluation."""
if pre_batch_size != batch_size: if pre_batch_size != batch_size:
raise ValueError("Pre-batch ({}) size is not equal to batch " raise ValueError("Pre-batch ({}) size is not equal to batch "
...@@ -51,6 +52,12 @@ def create_dataset_from_tf_record_files(input_file_pattern, ...@@ -51,6 +52,12 @@ def create_dataset_from_tf_record_files(input_file_pattern,
dataset = dataset.map( dataset = dataset.map(
decode_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE) decode_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)
if rebatch:
# A workaround for TPU Pod evaluation dataset.
# TODO (b/162341937) remove once it's fixed.
dataset = dataset.unbatch()
dataset = dataset.batch(pre_batch_size)
dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
return dataset return dataset
...@@ -151,12 +158,18 @@ def create_ncf_input_data(params, ...@@ -151,12 +158,18 @@ def create_ncf_input_data(params,
params["train_dataset_path"], params["train_dataset_path"],
input_meta_data["train_prebatch_size"], input_meta_data["train_prebatch_size"],
params["batch_size"], params["batch_size"],
is_training=True) is_training=True,
rebatch=False)
# Re-batch evaluation dataset for TPU Pods.
# TODO (b/162341937) remove once it's fixed.
eval_rebatch = (params["use_tpu"] and strategy.num_replicas_in_sync > 8)
eval_dataset = create_dataset_from_tf_record_files( eval_dataset = create_dataset_from_tf_record_files(
params["eval_dataset_path"], params["eval_dataset_path"],
input_meta_data["eval_prebatch_size"], input_meta_data["eval_prebatch_size"],
params["eval_batch_size"], params["eval_batch_size"],
is_training=False) is_training=False,
rebatch=eval_rebatch)
num_train_steps = int(input_meta_data["num_train_steps"]) num_train_steps = int(input_meta_data["num_train_steps"])
num_eval_steps = int(input_meta_data["num_eval_steps"]) num_eval_steps = int(input_meta_data["num_eval_steps"])
......
...@@ -235,6 +235,7 @@ def run_ncf(_): ...@@ -235,6 +235,7 @@ def run_ncf(_):
params = ncf_common.parse_flags(FLAGS) params = ncf_common.parse_flags(FLAGS)
params["distribute_strategy"] = strategy params["distribute_strategy"] = strategy
params["use_tpu"] = (FLAGS.distribution_strategy == "tpu")
if params["use_tpu"] and not params["keras_use_ctl"]: if params["use_tpu"] and not params["keras_use_ctl"]:
logging.error("Custom training loop must be used when using TPUStrategy.") logging.error("Custom training loop must be used when using TPUStrategy.")
...@@ -491,7 +492,8 @@ def run_ncf_custom_training(params, ...@@ -491,7 +492,8 @@ def run_ncf_custom_training(params,
logging.info("Done training epoch %s, epoch loss=%.3f", epoch + 1, logging.info("Done training epoch %s, epoch loss=%.3f", epoch + 1,
train_loss) train_loss)
eval_input_iterator = iter(eval_input_dataset) eval_input_iterator = iter(
strategy.experimental_distribute_dataset(eval_input_dataset))
hr_sum = 0.0 hr_sum = 0.0
hr_count = 0.0 hr_count = 0.0
......
...@@ -3,7 +3,7 @@ google-api-python-client>=1.6.7 ...@@ -3,7 +3,7 @@ google-api-python-client>=1.6.7
google-cloud-bigquery>=0.31.0 google-cloud-bigquery>=0.31.0
kaggle>=1.3.9 kaggle>=1.3.9
numpy>=1.15.4 numpy>=1.15.4
oauth2client>=4.1.2 oauth2client
pandas>=0.22.0 pandas>=0.22.0
psutil>=5.4.3 psutil>=5.4.3
py-cpuinfo>=3.3.0 py-cpuinfo>=3.3.0
...@@ -21,7 +21,7 @@ pyyaml ...@@ -21,7 +21,7 @@ pyyaml
# CV related dependencies # CV related dependencies
opencv-python-headless opencv-python-headless
Pillow Pillow
-e git+https://github.com/cocodataset/cocoapi#egg=pycocotools&subdirectory=PythonAPI pycocotools
# NLP related dependencies # NLP related dependencies
seqeval seqeval
sentencepiece sentencepiece
...@@ -26,6 +26,12 @@ from absl import logging ...@@ -26,6 +26,12 @@ from absl import logging
import tensorflow as tf import tensorflow as tf
from tensorflow.python.eager import monitoring
global_batch_size_gauge = monitoring.IntGauge(
'/tensorflow/training/global_batch_size', 'TF training global batch size')
class BatchTimestamp(object): class BatchTimestamp(object):
"""A structure to store batch time stamp.""" """A structure to store batch time stamp."""
...@@ -60,6 +66,8 @@ class TimeHistory(tf.keras.callbacks.Callback): ...@@ -60,6 +66,8 @@ class TimeHistory(tf.keras.callbacks.Callback):
self.steps_in_epoch = 0 self.steps_in_epoch = 0
self.start_time = None self.start_time = None
global_batch_size_gauge.get_cell().set(batch_size)
if logdir: if logdir:
self.summary_writer = tf.summary.create_file_writer(logdir) self.summary_writer = tf.summary.create_file_writer(logdir)
else: else:
......
# Lint as: python3
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Mock task for testing."""
import dataclasses
import numpy as np
import tensorflow as tf
from official.core import base_task
from official.core import exp_factory
from official.core import task_factory
from official.modeling.hyperparams import config_definitions as cfg
class MockModel(tf.keras.Model):
def __init__(self, network):
super().__init__()
self.network = network
def call(self, inputs):
outputs = self.network(inputs)
self.add_loss(tf.reduce_mean(outputs))
return outputs
@dataclasses.dataclass
class MockTaskConfig(cfg.TaskConfig):
pass
@task_factory.register_task_cls(MockTaskConfig)
class MockTask(base_task.Task):
"""Mock task object for testing."""
def __init__(self, params=None, logging_dir=None):
super().__init__(params=params, logging_dir=logging_dir)
def build_model(self, *arg, **kwargs):
inputs = tf.keras.layers.Input(shape=(2,), name="random", dtype=tf.float32)
outputs = tf.keras.layers.Dense(1)(inputs)
network = tf.keras.Model(inputs=inputs, outputs=outputs)
return MockModel(network)
def build_metrics(self, training: bool = True):
del training
return [tf.keras.metrics.Accuracy(name="acc")]
def build_inputs(self, params):
def generate_data(_):
x = tf.zeros(shape=(2,), dtype=tf.float32)
label = tf.zeros([1], dtype=tf.int32)
return x, label
dataset = tf.data.Dataset.range(1)
dataset = dataset.repeat()
dataset = dataset.map(
generate_data, num_parallel_calls=tf.data.experimental.AUTOTUNE)
return dataset.prefetch(buffer_size=1).batch(2, drop_remainder=True)
def aggregate_logs(self, state, step_outputs):
if state is None:
state = {}
for key, value in step_outputs.items():
if key not in state:
state[key] = []
state[key].append(
np.concatenate([np.expand_dims(v.numpy(), axis=0) for v in value]))
return state
def reduce_aggregated_logs(self, aggregated_logs):
for k, v in aggregated_logs.items():
aggregated_logs[k] = np.sum(np.stack(v, axis=0))
return aggregated_logs
@exp_factory.register_config_factory("mock")
def mock_experiment() -> cfg.ExperimentConfig:
config = cfg.ExperimentConfig(
task=MockTaskConfig(), trainer=cfg.TrainerConfig())
return config
...@@ -52,7 +52,6 @@ MASKRCNN_CFG.override({ ...@@ -52,7 +52,6 @@ MASKRCNN_CFG.override({
'anchor_size': 8, 'anchor_size': 8,
}, },
'rpn_head': { 'rpn_head': {
'anchors_per_location': 3,
'num_convs': 2, 'num_convs': 2,
'num_filters': 256, 'num_filters': 256,
'use_separable_conv': False, 'use_separable_conv': False,
......
...@@ -39,7 +39,6 @@ RETINANET_CFG.override({ ...@@ -39,7 +39,6 @@ RETINANET_CFG.override({
'max_num_instances': 100, 'max_num_instances': 100,
}, },
'retinanet_head': { 'retinanet_head': {
'anchors_per_location': 9,
'num_convs': 4, 'num_convs': 4,
'num_filters': 256, 'num_filters': 256,
'use_separable_conv': False, 'use_separable_conv': False,
......
...@@ -62,7 +62,6 @@ SHAPEMASK_CFG.override({ ...@@ -62,7 +62,6 @@ SHAPEMASK_CFG.override({
'upsample_factor': 4, 'upsample_factor': 4,
}, },
'retinanet_head': { 'retinanet_head': {
'anchors_per_location': 9,
'num_convs': 4, 'num_convs': 4,
'num_filters': 256, 'num_filters': 256,
'use_separable_conv': False, 'use_separable_conv': False,
......
...@@ -46,15 +46,15 @@ class Anchor(object): ...@@ -46,15 +46,15 @@ class Anchor(object):
num_scales: integer number representing intermediate scales added num_scales: integer number representing intermediate scales added
on each level. For instances, num_scales=2 adds one additional on each level. For instances, num_scales=2 adds one additional
intermediate anchor scales [2^0, 2^0.5] on each level. intermediate anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: list of float numbers representing the aspect raito anchors aspect_ratios: list of float numbers representing the aspect ratio anchors
added on each level. The number indicates the ratio of width to height. added on each level. The number indicates the ratio of width to height.
For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors on each
scale level. scale level.
anchor_size: float number representing the scale of size of the base anchor_size: float number representing the scale of size of the base
anchor to the feature stride 2^level. anchor to the feature stride 2^level.
image_size: a list of integer numbers or Tensors representing image_size: a list of integer numbers or Tensors representing
[height, width] of the input image size.The image_size should be divided [height, width] of the input image size.The image_size should be
by the largest feature stride 2^max_level. divisible by the largest feature stride 2^max_level.
""" """
self.min_level = min_level self.min_level = min_level
self.max_level = max_level self.max_level = max_level
...@@ -77,8 +77,8 @@ class Anchor(object): ...@@ -77,8 +77,8 @@ class Anchor(object):
for scale in range(self.num_scales): for scale in range(self.num_scales):
for aspect_ratio in self.aspect_ratios: for aspect_ratio in self.aspect_ratios:
stride = 2 ** level stride = 2 ** level
intermidate_scale = 2 ** (scale / float(self.num_scales)) intermediate_scale = 2 ** (scale / float(self.num_scales))
base_anchor_size = self.anchor_size * stride * intermidate_scale base_anchor_size = self.anchor_size * stride * intermediate_scale
aspect_x = aspect_ratio ** 0.5 aspect_x = aspect_ratio ** 0.5
aspect_y = aspect_ratio ** -0.5 aspect_y = aspect_ratio ** -0.5
half_anchor_size_x = base_anchor_size * aspect_x / 2.0 half_anchor_size_x = base_anchor_size * aspect_x / 2.0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment