"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "534e86b38db748fbdfd71e2bf6970968b39f8357"
Commit a52564cb authored by Hongkun Yu's avatar Hongkun Yu Committed by A. Unique TensorFlower
Browse files

Perfzero XLNet classifier Imdb accuracy test on 8 GPUs.

PiperOrigin-RevId: 270817869
parent 08bb9eb5
...@@ -26,7 +26,7 @@ import time ...@@ -26,7 +26,7 @@ import time
# pylint: disable=g-bad-import-order # pylint: disable=g-bad-import-order
from absl import flags from absl import flags
from absl.testing import flagsaver from absl.testing import flagsaver
import tensorflow.compat.v2 as tf import tensorflow as tf
# pylint: enable=g-bad-import-order # pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils from official.benchmark import bert_benchmark_utils as benchmark_utils
......
...@@ -100,12 +100,19 @@ class BertBenchmarkBase(tf.test.Benchmark): ...@@ -100,12 +100,19 @@ class BertBenchmarkBase(tf.test.Benchmark):
metrics = [{ metrics = [{
'name': 'training_loss', 'name': 'training_loss',
'value': stats['train_loss'], 'value': stats['train_loss'],
}, {
'name':
'exp_per_second',
'value':
self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size)
}] }]
if self.timer_callback:
metrics.append({
'name':
'exp_per_second',
'value':
self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size)
})
else:
metrics.append({
'name': 'exp_per_second',
'value': 0.0,
})
if 'eval_metrics' in stats: if 'eval_metrics' in stats:
metrics.append({ metrics.append({
......
...@@ -25,7 +25,7 @@ import time ...@@ -25,7 +25,7 @@ import time
# pylint: disable=g-bad-import-order # pylint: disable=g-bad-import-order
from absl import flags from absl import flags
from absl.testing import flagsaver from absl.testing import flagsaver
import tensorflow.compat.v2 as tf import tensorflow as tf
# pylint: enable=g-bad-import-order # pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils from official.benchmark import bert_benchmark_utils as benchmark_utils
......
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes XLNet benchmarks and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import os
import time
# pylint: disable=g-bad-import-order
from absl import flags
from absl.testing import flagsaver
import tensorflow as tf
# pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils
from official.nlp.xlnet import run_classifier
# pylint: disable=line-too-long
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/xlnet/large/xlnet_model-1'
CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/xlnet/imdb/spiece.model.len-512.train.tf_record'
CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/xlnet/imdb/spiece.model.len-512.dev.eval.tf_record'
# pylint: enable=line-too-long
FLAGS = flags.FLAGS
class XLNetClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
"""Base class to hold methods common to test classes in the module."""
def __init__(self, output_dir=None):
super(XLNetClassifyBenchmarkBase, self).__init__(output_dir)
self.num_epochs = None
self.num_steps_per_epoch = None
@flagsaver.flagsaver
def _run_xlnet_classifier(self):
"""Starts XLNet classification task."""
run_classifier.main(unused_argv=None)
class XLNetClassifyAccuracy(XLNetClassifyBenchmarkBase):
"""Short accuracy test for XLNet model.
Tests XLNet classification task model accuracy. The naming
convention of below test cases follow
`benchmark_(number of gpus)_gpu_(dataset type)` format.
"""
def __init__(self, output_dir=None, **kwargs):
self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
super(XLNetClassifyAccuracy, self).__init__(output_dir=output_dir)
def _run_and_report_benchmark(self,
training_summary_path,
min_accuracy=0.95,
max_accuracy=0.97):
"""Starts XLNet accuracy benchmark test."""
start_time_sec = time.time()
self._run_xlnet_classifier()
wall_time_sec = time.time() - start_time_sec
with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
summary = json.loads(reader.read().decode('utf-8'))
super(XLNetClassifyAccuracy, self)._report_benchmark(
stats=summary,
wall_time_sec=wall_time_sec,
min_accuracy=min_accuracy,
max_accuracy=max_accuracy)
def _setup(self):
super(XLNetClassifyAccuracy, self)._setup()
FLAGS.train_data_size = 25000
FLAGS.test_data_size = 25024
FLAGS.train_batch_size = 16
FLAGS.seq_len = 512
FLAGS.reuse_len = 256
FLAGS.mem_len = 0
FLAGS.n_layer = 24
FLAGS.d_model = 1024
FLAGS.d_embed = 1024
FLAGS.n_head = 16
FLAGS.d_head = 64
FLAGS.d_inner = 4096
FLAGS.untie_r = True
FLAGS.n_class = 2
FLAGS.ff_activation = 'gelu'
FLAGS.strategy_type = 'mirror'
FLAGS.learning_rate = 2e-5
FLAGS.train_steps = 4000
FLAGS.warmup_steps = 500
FLAGS.iterations = 200
FLAGS.bi_data = False
FLAGS.init_checkpoint = self.pretrained_checkpoint_path
FLAGS.train_tfrecord_path = self.train_data_path
FLAGS.test_tfrecord_path = self.eval_data_path
def benchmark_8_gpu_imdb(self):
"""Run XLNet model accuracy test with 8 GPUs."""
self._setup()
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_imdb')
# Sets timer_callback to None as we do not use it now.
self.timer_callback = None
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path)
if __name__ == '__main__':
tf.test.main()
...@@ -72,7 +72,7 @@ def _steps_to_run(current_step, steps_per_epoch, steps_per_loop): ...@@ -72,7 +72,7 @@ def _steps_to_run(current_step, steps_per_epoch, steps_per_loop):
return steps_per_loop return steps_per_loop
def _write_txt_summary(training_summary, model_dir): def write_txt_summary(training_summary, model_dir):
"""Writes a summary text file to record stats.""" """Writes a summary text file to record stats."""
summary_path = os.path.join(model_dir, _SUMMARY_TXT) summary_path = os.path.join(model_dir, _SUMMARY_TXT)
with tf.io.gfile.GFile(summary_path, 'wb') as f: with tf.io.gfile.GFile(summary_path, 'wb') as f:
...@@ -415,6 +415,6 @@ def run_customized_training_loop( ...@@ -415,6 +415,6 @@ def run_customized_training_loop(
train_metrics[0]) train_metrics[0])
training_summary['eval_metrics'] = _float_metric_value(eval_metrics[0]) training_summary['eval_metrics'] = _float_metric_value(eval_metrics[0])
_write_txt_summary(training_summary, model_dir) write_txt_summary(training_summary, model_dir)
return model return model
...@@ -65,6 +65,8 @@ def run_evaluation(strategy, ...@@ -65,6 +65,8 @@ def run_evaluation(strategy,
them when calculating the accuracy. For the reason that there will be them when calculating the accuracy. For the reason that there will be
dynamic-shape tensor, we first collect logits, labels and masks from TPU dynamic-shape tensor, we first collect logits, labels and masks from TPU
and calculate the accuracy via numpy locally. and calculate the accuracy via numpy locally.
Returns:
A float metric, accuracy.
""" """
def _test_step_fn(inputs): def _test_step_fn(inputs):
...@@ -108,12 +110,14 @@ def run_evaluation(strategy, ...@@ -108,12 +110,14 @@ def run_evaluation(strategy,
np.argmax(merged_logits[real_index], axis=-1), np.argmax(merged_logits[real_index], axis=-1),
merged_labels[real_index])) merged_labels[real_index]))
total += np.shape(real_index)[-1] total += np.shape(real_index)[-1]
accuracy = float(correct) / float(total)
logging.info("Train step: %d / acc = %d/%d = %f", step, correct, total, logging.info("Train step: %d / acc = %d/%d = %f", step, correct, total,
float(correct) / float(total)) accuracy)
if eval_summary_writer: if eval_summary_writer:
with eval_summary_writer.as_default(): with eval_summary_writer.as_default():
tf.summary.scalar("eval_acc", float(correct) / float(total), step=step) tf.summary.scalar("eval_acc", float(correct) / float(total), step=step)
eval_summary_writer.flush() eval_summary_writer.flush()
return accuracy
def get_metric_fn(): def get_metric_fn():
...@@ -191,7 +195,8 @@ def main(unused_argv): ...@@ -191,7 +195,8 @@ def main(unused_argv):
steps_per_loop=steps_per_loop, steps_per_loop=steps_per_loop,
optimizer=optimizer, optimizer=optimizer,
learning_rate_fn=learning_rate_fn, learning_rate_fn=learning_rate_fn,
model_dir=FLAGS.model_dir) model_dir=FLAGS.model_dir,
save_steps=1000)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -116,7 +116,8 @@ def run_evaluation(strategy, ...@@ -116,7 +116,8 @@ def run_evaluation(strategy,
model: keras model object. model: keras model object.
step: current training step. step: current training step.
eval_summary_writer: summary writer used to record evaluation metrics. eval_summary_writer: summary writer used to record evaluation metrics.
Returns:
A float metric, F1 score.
""" """
def _test_step_fn(inputs): def _test_step_fn(inputs):
...@@ -192,23 +193,23 @@ def run_evaluation(strategy, ...@@ -192,23 +193,23 @@ def run_evaluation(strategy,
output_null_log_odds_file = os.path.join(input_meta_data["predict_dir"], output_null_log_odds_file = os.path.join(input_meta_data["predict_dir"],
"null_odds.json") "null_odds.json")
ret = squad_utils.write_predictions( results = squad_utils.write_predictions(
eval_examples, input_meta_data["eval_features"], cur_results, eval_examples, input_meta_data["eval_features"], cur_results,
input_meta_data["n_best_size"], input_meta_data["max_answer_length"], input_meta_data["n_best_size"], input_meta_data["max_answer_length"],
output_prediction_file, output_nbest_file, output_null_log_odds_file, output_prediction_file, output_nbest_file, output_null_log_odds_file,
orig_data, input_meta_data["start_n_top"], input_meta_data["end_n_top"]) orig_data, input_meta_data["start_n_top"], input_meta_data["end_n_top"])
# Log current result # Log current results.
log_str = "Result | " log_str = "Result | "
for key, val in ret.items(): for key, val in results.items():
log_str += "{} {} | ".format(key, val) log_str += "{} {} | ".format(key, val)
logging.info(log_str) logging.info(log_str)
if eval_summary_writer: if eval_summary_writer:
with eval_summary_writer.as_default(): with eval_summary_writer.as_default():
tf.summary.scalar("best_f1", ret["best_f1"], step=step) tf.summary.scalar("best_f1", results["best_f1"], step=step)
tf.summary.scalar("best_exact", ret["best_exact"], step=step) tf.summary.scalar("best_exact", results["best_exact"], step=step)
eval_summary_writer.flush() eval_summary_writer.flush()
return results["best_f1"]
def get_qaxlnet_model(model_config, run_config, start_n_top, end_n_top): def get_qaxlnet_model(model_config, run_config, start_n_top, end_n_top):
......
...@@ -27,7 +27,7 @@ from absl import logging ...@@ -27,7 +27,7 @@ from absl import logging
# pytype: disable=attribute-error # pytype: disable=attribute-error
# pylint: disable=g-bare-generic,unused-import # pylint: disable=g-bare-generic,unused-import
import tensorflow as tf import tensorflow as tf
# Initialize TPU System. from official.modeling import model_training_utils
from official.nlp.xlnet import data_utils from official.nlp.xlnet import data_utils
from official.nlp import xlnet_modeling as modeling from official.nlp import xlnet_modeling as modeling
from typing import Any, Callable, Dict, Text, Optional from typing import Any, Callable, Dict, Text, Optional
...@@ -304,6 +304,18 @@ def train( ...@@ -304,6 +304,18 @@ def train(
checkpoint_name.format(step=current_step)) checkpoint_name.format(step=current_step))
if test_input_fn: if test_input_fn:
logging.info("Running final evaluation after training is complete.") logging.info("Running final evaluation after training is complete.")
eval_fn(model, current_step, eval_summary_writer) eval_metric = eval_fn(model, current_step, eval_summary_writer)
training_summary = {
"total_training_steps": total_training_steps,
"train_loss": _float_metric_value(train_loss_metric),
}
if train_metric:
training_summary["last_train_metrics"] = _float_metric_value(train_metric)
if test_input_fn:
# eval_metric is supposed to be a float.
training_summary["eval_metrics"] = eval_metric
model_training_utils.write_txt_summary(training_summary, model_dir)
return model return model
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment